From c13c8260da3155f2cefb63b0d1b7dcdcb405c644 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:18:44 -0700 Subject: [I/OAT]: DMA memcpy subsystem Provides an API for offloading memory copies to DMA devices Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 337 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 include/linux/dmaengine.h (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h new file mode 100644 index 0000000000..30781546ac --- /dev/null +++ b/include/linux/dmaengine.h @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef DMAENGINE_H +#define DMAENGINE_H +#include +#ifdef CONFIG_DMA_ENGINE + +#include +#include +#include +#include +#include + +/** + * enum dma_event - resource PNP/power managment events + * @DMA_RESOURCE_SUSPEND: DMA device going into low power state + * @DMA_RESOURCE_RESUME: DMA device returning to full power + * @DMA_RESOURCE_ADDED: DMA device added to the system + * @DMA_RESOURCE_REMOVED: DMA device removed from the system + */ +enum dma_event { + DMA_RESOURCE_SUSPEND, + DMA_RESOURCE_RESUME, + DMA_RESOURCE_ADDED, + DMA_RESOURCE_REMOVED, +}; + +/** + * typedef dma_cookie_t + * + * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code + */ +typedef s32 dma_cookie_t; + +#define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0) + +/** + * enum dma_status - DMA transaction status + * @DMA_SUCCESS: transaction completed successfully + * @DMA_IN_PROGRESS: transaction not yet processed + * @DMA_ERROR: transaction failed + */ +enum dma_status { + DMA_SUCCESS, + DMA_IN_PROGRESS, + DMA_ERROR, +}; + +/** + * struct dma_chan_percpu - the per-CPU part of struct dma_chan + * @refcount: local_t used for open-coded "bigref" counting + * @memcpy_count: transaction counter + * @bytes_transferred: byte counter + */ + +struct dma_chan_percpu { + local_t refcount; + /* stats */ + unsigned long memcpy_count; + unsigned long bytes_transferred; +}; + +/** + * struct dma_chan - devices supply DMA channels, clients use them + * @client: ptr to the client user of this chan, will be NULL when unused + * @device: ptr to the dma device who supplies this channel, always !NULL + * @cookie: last cookie value returned to client + * @chan_id: + * @class_dev: + * @refcount: kref, used in "bigref" slow-mode + * @slow_ref: + * @rcu: + * @client_node: used to add this to the client chan list + * @device_node: used to add this to the device chan list + * @local: per-cpu pointer to a struct dma_chan_percpu + */ +struct dma_chan { + struct dma_client *client; + struct dma_device *device; + dma_cookie_t cookie; + + /* sysfs */ + int chan_id; + struct class_device class_dev; + + struct kref refcount; + int slow_ref; + struct rcu_head rcu; + + struct list_head client_node; + struct list_head device_node; + struct dma_chan_percpu *local; +}; + +void dma_chan_cleanup(struct kref *kref); + +static inline void dma_chan_get(struct dma_chan *chan) +{ + if (unlikely(chan->slow_ref)) + kref_get(&chan->refcount); + else { + local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); + put_cpu(); + } +} + +static inline void dma_chan_put(struct dma_chan *chan) +{ + if (unlikely(chan->slow_ref)) + kref_put(&chan->refcount, dma_chan_cleanup); + else { + local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount)); + put_cpu(); + } +} + +/* + * typedef dma_event_callback - function pointer to a DMA event callback + */ +typedef void (*dma_event_callback) (struct dma_client *client, + struct dma_chan *chan, enum dma_event event); + +/** + * struct dma_client - info on the entity making use of DMA services + * @event_callback: func ptr to call when something happens + * @chan_count: number of chans allocated + * @chans_desired: number of chans requested. Can be +/- chan_count + * @lock: protects access to the channels list + * @channels: the list of DMA channels allocated + * @global_node: list_head for global dma_client_list + */ +struct dma_client { + dma_event_callback event_callback; + unsigned int chan_count; + unsigned int chans_desired; + + spinlock_t lock; + struct list_head channels; + struct list_head global_node; +}; + +/** + * struct dma_device - info on the entity supplying DMA services + * @chancnt: how many DMA channels are supported + * @channels: the list of struct dma_chan + * @global_node: list_head for global dma_device_list + * @refcount: + * @done: + * @dev_id: + * Other func ptrs: used to make use of this device's capabilities + */ +struct dma_device { + + unsigned int chancnt; + struct list_head channels; + struct list_head global_node; + + struct kref refcount; + struct completion done; + + int dev_id; + + int (*device_alloc_chan_resources)(struct dma_chan *chan); + void (*device_free_chan_resources)(struct dma_chan *chan); + dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, + void *dest, void *src, size_t len); + dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, + size_t len); + dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, + struct page *src_pg, unsigned int src_off, size_t len); + enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, + dma_cookie_t *used); + void (*device_memcpy_issue_pending)(struct dma_chan *chan); +}; + +/* --- public DMA engine API --- */ + +struct dma_client *dma_async_client_register(dma_event_callback event_callback); +void dma_async_client_unregister(struct dma_client *client); +void dma_async_client_chan_request(struct dma_client *client, + unsigned int number); + +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, + void *dest, void *src, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); +} + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_buf_to_pg(chan, page, offset, + kdata, len); +} + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy + * @chan: DMA channel to offload copy to + * @dest_page: destination page + * @dest_off: offset in page to copy to + * @src_page: source page + * @src_off: offset in page to copy from + * @len: length + * + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus + * address according to the DMA mapping API rules for streaming mappings. + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident + * (kernel memory or locked user space pages) + */ +static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, struct page *src_pg, + unsigned int src_off, size_t len) +{ + int cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, + src_pg, src_off, len); +} + +/** + * dma_async_memcpy_issue_pending - flush pending copies to HW + * @chan: + * + * This allows drivers to push copies to HW in batches, + * reducing MMIO writes where possible. + */ +static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) +{ + return chan->device->device_memcpy_issue_pending(chan); +} + +/** + * dma_async_memcpy_complete - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @last: returns last completed cookie, can be NULL + * @used: returns last issued cookie, can be NULL + * + * If @last and @used are passed in, upon return they reflect the driver + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) +{ + return chan->device->device_memcpy_complete(chan, cookie, last, used); +} + +/** + * dma_async_is_complete - test a cookie against chan state + * @cookie: transaction identifier to test status of + * @last_complete: last know completed transaction + * @last_used: last cookie value handed out + * + * dma_async_is_complete() is used in dma_async_memcpy_complete() + * the test logic is seperated for lightweight testing of multiple cookies + */ +static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, + dma_cookie_t last_complete, dma_cookie_t last_used) +{ + if (last_complete <= last_used) { + if ((cookie <= last_complete) || (cookie > last_used)) + return DMA_SUCCESS; + } else { + if ((cookie <= last_complete) && (cookie > last_used)) + return DMA_SUCCESS; + } + return DMA_IN_PROGRESS; +} + + +/* --- DMA device --- */ + +int dma_async_device_register(struct dma_device *device); +void dma_async_device_unregister(struct dma_device *device); + +#endif /* CONFIG_DMA_ENGINE */ +#endif /* DMAENGINE_H */ -- cgit v1.2.2 From 57c651f74cd8383df10a648e677902849de1bc0b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 May 2006 17:39:49 -0700 Subject: [I/OAT]: Move PCI_DEVICE_ID_INTEL_IOAT to linux/pci_ids.h Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 590dc6dca3..819c8e1324 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2043,6 +2043,7 @@ #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 +#define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 #define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 -- cgit v1.2.2 From db21733488f84a596faaad0d05430b3f51804692 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Sat, 17 Jun 2006 21:24:58 -0700 Subject: [I/OAT]: Setup the networking subsystem as a DMA client Attempts to allocate per-CPU DMA channels Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ include/net/netdma.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 include/net/netdma.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f4169bbb60..b5760c67af 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -37,6 +37,7 @@ #include #include #include +#include struct divert_blk; struct vlan_group; @@ -593,6 +594,9 @@ struct softnet_data struct sk_buff *completion_queue; struct net_device backlog_dev; /* Sorry. 8) */ +#ifdef CONFIG_NET_DMA + struct dma_chan *net_dma; +#endif }; DECLARE_PER_CPU(struct softnet_data,softnet_data); diff --git a/include/net/netdma.h b/include/net/netdma.h new file mode 100644 index 0000000000..cbfe89d7e5 --- /dev/null +++ b/include/net/netdma.h @@ -0,0 +1,38 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef NETDMA_H +#define NETDMA_H +#include +#ifdef CONFIG_NET_DMA +#include + +static inline struct dma_chan *get_softnet_dma(void) +{ + struct dma_chan *chan; + rcu_read_lock(); + chan = rcu_dereference(__get_cpu_var(softnet_data.net_dma)); + if (chan) + dma_chan_get(chan); + rcu_read_unlock(); + return chan; +} +#endif /* CONFIG_NET_DMA */ +#endif /* NETDMA_H */ -- cgit v1.2.2 From de5506e155276d385712c2aa1c2d9a27cd4ed947 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:50:37 -0700 Subject: [I/OAT]: Utility functions for offloading sk_buff to iovec copies Provides for pinning user space pages in memory, copying to iovecs, and copying from sk_buffs including fragmented and chained sk_buffs. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 22 ++++++++++++++++++++++ include/net/netdma.h | 6 ++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 30781546ac..78b236ca04 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -333,5 +333,27 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); +/* --- Helper iov-locking functions --- */ + +struct dma_page_list { + char *base_address; + int nr_pages; + struct page **pages; +}; + +struct dma_pinned_list { + int nr_iovecs; + struct dma_page_list page_list[0]; +}; + +struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len); +void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list); + +dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len); +dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, struct page *page, + unsigned int offset, size_t len); + #endif /* CONFIG_DMA_ENGINE */ #endif /* DMAENGINE_H */ diff --git a/include/net/netdma.h b/include/net/netdma.h index cbfe89d7e5..19760eb131 100644 --- a/include/net/netdma.h +++ b/include/net/netdma.h @@ -23,6 +23,7 @@ #include #ifdef CONFIG_NET_DMA #include +#include static inline struct dma_chan *get_softnet_dma(void) { @@ -34,5 +35,10 @@ static inline struct dma_chan *get_softnet_dma(void) rcu_read_unlock(); return chan; } + +int dma_skb_copy_datagram_iovec(struct dma_chan* chan, + const struct sk_buff *skb, int offset, struct iovec *to, + size_t len, struct dma_pinned_list *pinned_list); + #endif /* CONFIG_NET_DMA */ #endif /* NETDMA_H */ -- cgit v1.2.2 From 97fc2f0848c928c63c2ae619deee61a0b1107b69 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 17:55:33 -0700 Subject: [I/OAT]: Structure changes for TCP recv offload to I/OAT Adds an async_wait_queue and some additional fields to tcp_sock, and a dma_cookie_t to sk_buff. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/linux/tcp.h | 8 ++++++++ include/net/sock.h | 2 ++ include/net/tcp.h | 7 +++++++ 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f8f234708b..23bad3bf3c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -29,6 +29,7 @@ #include #include #include +#include #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ @@ -285,6 +286,9 @@ struct sk_buff { __u16 tc_verd; /* traffic control verdict */ #endif #endif +#ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; +#endif /* These elements must be at the end, see alloc_skb() for details. */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 542d39596b..c90daa5da6 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -18,6 +18,7 @@ #define _LINUX_TCP_H #include +#include #include struct tcphdr { @@ -233,6 +234,13 @@ struct tcp_sock { struct iovec *iov; int memory; int len; +#ifdef CONFIG_NET_DMA + /* members for async copy */ + struct dma_chan *dma_chan; + int wakeup; + struct dma_pinned_list *pinned_list; + dma_cookie_t dma_cookie; +#endif } ucopy; __u32 snd_wl1; /* Sequence for window update */ diff --git a/include/net/sock.h b/include/net/sock.h index c9fad6fb62..90c65cb091 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -132,6 +132,7 @@ struct sock_common { * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_write_queue: Packet sending queue + * @sk_async_wait_queue: DMA copied packets * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward @@ -205,6 +206,7 @@ struct sock { atomic_t sk_omem_alloc; struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; + struct sk_buff_head sk_async_wait_queue; int sk_wmem_queued; int sk_forward_alloc; gfp_t sk_allocation; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c989db8a7..d0c2c2fa75 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -817,6 +818,12 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) tp->ucopy.len = 0; tp->ucopy.memory = 0; skb_queue_head_init(&tp->ucopy.prequeue); +#ifdef CONFIG_NET_DMA + tp->ucopy.dma_chan = NULL; + tp->ucopy.wakeup = 0; + tp->ucopy.pinned_list = NULL; + tp->ucopy.dma_cookie = 0; +#endif } /* Packet is added to VJ-style prequeue for processing in process -- cgit v1.2.2 From 0e4b4992b8007c6b62ec143cbbb292f98813ca11 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 18:00:16 -0700 Subject: [I/OAT]: Rename cleanup_rbuf to tcp_cleanup_rbuf and make non-static Needed to be able to call tcp_cleanup_rbuf in tcp_input.c for I/OAT Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d0c2c2fa75..578cccf275 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -294,6 +294,8 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); +extern void tcp_cleanup_rbuf(struct sock *sk, int copied); + extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); -- cgit v1.2.2 From 624d1164730d58a494cc5aa4afa37d02c41e83a7 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 18:01:28 -0700 Subject: [I/OAT]: Make sk_eat_skb I/OAT aware. Add an extra argument to sk_eat_skb, and make it move early copied packets to the async_wait_queue instead of freeing them. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/net/sock.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 90c65cb091..75b0e97ed9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1273,11 +1273,22 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) +#ifdef CONFIG_NET_DMA +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) +{ + __skb_unlink(skb, &sk->sk_receive_queue); + if (!copied_early) + __kfree_skb(skb); + else + __skb_queue_tail(&sk->sk_async_wait_queue, skb); +} +#else +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) { __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } +#endif extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *); -- cgit v1.2.2 From 9593782585e0cf70babe787a8463d492a68b1744 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 May 2006 18:02:55 -0700 Subject: [I/OAT]: Add a sysctl for tuning the I/OAT offloaded I/O threshold Any socket recv of less than this ammount will not be offloaded Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/tcp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 76eaeff76f..cd9e7c0825 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -403,6 +403,7 @@ enum NET_TCP_MTU_PROBING=113, NET_TCP_BASE_MSS=114, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, + NET_TCP_DMA_COPYBREAK=116, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index 578cccf275..f1f472746e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -219,6 +219,7 @@ extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; +extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -- cgit v1.2.2 From aecbd4e45c2e469e0452ffb2c0b0d881e2815bb8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 25 May 2006 15:08:30 -0700 Subject: [LLC]: use more efficient ether address routines Use more cache efficient Ethernet address manipulation functions in etherdevice.h. Signed-off-by: Stephen Hemminger --- include/net/llc_if.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/llc_if.h b/include/net/llc_if.h index 090eaa0d71..a05d04ac45 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #define LLC_DATAUNIT_PRIM 1 @@ -61,8 +62,6 @@ #define LLC_STATUS_CONFLICT 7 /* disconnect conn */ #define LLC_STATUS_RESET_DONE 8 /* */ -extern u8 llc_mac_null_var[IFHWADDRLEN]; - /** * llc_mac_null - determines if a address is a null mac address * @mac: Mac address to test if null. @@ -70,12 +69,12 @@ extern u8 llc_mac_null_var[IFHWADDRLEN]; * Determines if a given address is a null mac address. Returns 0 if the * address is not a null mac, 1 if the address is a null mac. */ -static __inline__ int llc_mac_null(u8 *mac) +static inline int llc_mac_null(const u8 *mac) { - return !memcmp(mac, llc_mac_null_var, IFHWADDRLEN); + return is_zero_ether_addr(mac); } -static __inline__ int llc_addrany(struct llc_addr *addr) +static inline int llc_addrany(const struct llc_addr *addr) { return llc_mac_null(addr->mac) && !addr->lsap; } @@ -89,9 +88,9 @@ static __inline__ int llc_addrany(struct llc_addr *addr) * is not a complete match up to len, 1 if a complete match up to len is * found. */ -static __inline__ int llc_mac_match(u8 *mac1, u8 *mac2) +static inline int llc_mac_match(const u8 *mac1, const u8 *mac2) { - return !memcmp(mac1, mac2, IFHWADDRLEN); + return !compare_ether_addr(mac1, mac2); } extern int llc_establish_connection(struct sock *sk, u8 *lmac, -- cgit v1.2.2 From bc0e646796928918e45b6465e02616f2fe65c3c1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 25 May 2006 15:10:37 -0700 Subject: [LLC]: add multicast support for datagrams Allow mulitcast reception of datagrams (similar to UDP). All sockets bound to the same SAP receive a clone. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/llc_if.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/llc_if.h b/include/net/llc_if.h index a05d04ac45..c608812a8e 100644 --- a/include/net/llc_if.h +++ b/include/net/llc_if.h @@ -79,6 +79,10 @@ static inline int llc_addrany(const struct llc_addr *addr) return llc_mac_null(addr->mac) && !addr->lsap; } +static inline int llc_mac_multicast(const u8 *mac) +{ + return is_multicast_ether_addr(mac); +} /** * llc_mac_match - determines if two mac addresses are the same * @mac1: First mac address to compare. -- cgit v1.2.2 From 30b6c28d2aca4669f2e609ad5d77ea2a6cf0dd3a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 26 May 2006 17:44:45 -0700 Subject: [TG3]: Add 5786 PCI ID Add PCI ID for BCM5786 which is a variant of 5787. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 819c8e1324..cf45e8bb69 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1887,6 +1887,7 @@ #define PCI_DEVICE_ID_TIGON3_5751F 0x167e #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 +#define PCI_DEVICE_ID_TIGON3_5786 0x169a #define PCI_DEVICE_ID_TIGON3_5787 0x169b #define PCI_DEVICE_ID_TIGON3_5788 0x169c #define PCI_DEVICE_ID_TIGON3_5789 0x169d -- cgit v1.2.2 From 546be2405be119ef55467aace45f337a16e5d424 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 May 2006 23:03:58 -0700 Subject: [IPSEC] xfrm: Undo afinfo lock proliferation The number of locks used to manage afinfo structures can easily be reduced down to one each for policy and state respectively. This is based on the observation that the write locks are only held by module insertion/removal which are very rare events so there is no need to further differentiate between the insertion of modules like ipv6 versus esp6. The removal of the read locks in xfrm4_policy.c/xfrm6_policy.c might look suspicious at first. However, after you realise that nobody ever takes the corresponding write lock you'll feel better :) As far as I can gather it's an attempt to guard against the removal of the corresponding modules. Since neither module can be unloaded at all we can leave it to whoever fixes up IPv6 unloading :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afa508d92c..ed7c974705 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -204,8 +204,7 @@ struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; - rwlock_t lock; - struct xfrm_type_map *type_map; + struct xfrm_type *type_map[256]; struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); @@ -232,7 +231,6 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - rwlock_t lock; struct list_head *state_bydst; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); @@ -264,11 +262,6 @@ struct xfrm_type u32 (*get_max_size)(struct xfrm_state *, int size); }; -struct xfrm_type_map { - rwlock_t lock; - struct xfrm_type *map[256]; -}; - extern int xfrm_register_type(struct xfrm_type *type, unsigned short family); extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family); extern struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family); -- cgit v1.2.2 From b59f45d0b2878ab76f8053b0973654e6621828ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 May 2006 23:05:54 -0700 Subject: [IPSEC] xfrm: Abstract out encapsulation modes This patch adds the structure xfrm_mode. It is meant to represent the operations carried out by transport/tunnel modes. By doing this we allow additional encapsulation modes to be added without clogging up the xfrm_input/xfrm_output paths. Candidate modes include 4-to-6 tunnel mode, 6-to-4 tunnel mode, and BEET modes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 4 ++++ include/net/xfrm.h | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 6b42cc474c..46a15c7a1a 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -118,6 +118,10 @@ enum XFRM_SHARE_UNIQUE /* Use once */ }; +#define XFRM_MODE_TRANSPORT 0 +#define XFRM_MODE_TUNNEL 1 +#define XFRM_MODE_MAX 2 + /* Netlink configuration messages. */ enum { XFRM_MSG_BASE = 0x10, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ed7c974705..ed5bb34f81 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,6 +20,8 @@ #include #define XFRM_ALIGN8(len) (((len) + 7) & ~7) +#define MODULE_ALIAS_XFRM_MODE(family, encap) \ + MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) extern struct sock *xfrm_nl; extern u32 sysctl_xfrm_aevent_etime; @@ -164,6 +166,7 @@ struct xfrm_state /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; + struct xfrm_mode *mode; /* Security context */ struct xfrm_sec_ctx *security; @@ -205,6 +208,7 @@ struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; struct xfrm_type *type_map[256]; + struct xfrm_mode *mode_map[XFRM_MODE_MAX]; struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); @@ -267,6 +271,19 @@ extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family); extern struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family); extern void xfrm_put_type(struct xfrm_type *type); +struct xfrm_mode { + int (*input)(struct xfrm_state *x, struct sk_buff *skb); + int (*output)(struct sk_buff *skb); + + struct module *owner; + unsigned int encap; +}; + +extern int xfrm_register_mode(struct xfrm_mode *mode, int family); +extern int xfrm_unregister_mode(struct xfrm_mode *mode, int family); +extern struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family); +extern void xfrm_put_mode(struct xfrm_mode *mode); + struct xfrm_tmpl { /* id in template is interpreted as: -- cgit v1.2.2 From 73654d61e556483ad324b90989eae26b22df6ef6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 May 2006 23:06:33 -0700 Subject: [IPSEC] xfrm: Use IPPROTO_MAX instead of 256 The size of the type_map array (256) comes from the number of IP protocols, i.e., IPPROTO_MAX. This patch is based on a suggestion from Ingo Oeser. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ed5bb34f81..9c5ee9f20b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -207,7 +207,7 @@ struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; - struct xfrm_type *type_map[256]; + struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; struct dst_ops *dst_ops; void (*garbage_collect)(void); -- cgit v1.2.2 From 62b7743483b402f8fb73545d5d487ca714e82766 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:20:32 -0700 Subject: [NETFILTER]: x_tables: add quota match Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_quota.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/netfilter/xt_quota.h (limited to 'include') diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h new file mode 100644 index 0000000000..acd7fd77bb --- /dev/null +++ b/include/linux/netfilter/xt_quota.h @@ -0,0 +1,16 @@ +#ifndef _XT_QUOTA_H +#define _XT_QUOTA_H + +enum xt_quota_flags { + XT_QUOTA_INVERT = 0x1, +}; +#define XT_QUOTA_MASK 0x1 + +struct xt_quota_info { + u_int32_t flags; + u_int32_t pad; + aligned_u64 quota; + struct xt_quota_info *master; +}; + +#endif /* _XT_QUOTA_H */ -- cgit v1.2.2 From f3389805e53a13bd969ee1c8fc5a4137b7c6c167 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:21:00 -0700 Subject: [NETFILTER]: x_tables: add statistic match Add statistic match which is a combination of the nth and random matches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_statistic.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/netfilter/xt_statistic.h (limited to 'include') diff --git a/include/linux/netfilter/xt_statistic.h b/include/linux/netfilter/xt_statistic.h new file mode 100644 index 0000000000..c344e9916e --- /dev/null +++ b/include/linux/netfilter/xt_statistic.h @@ -0,0 +1,32 @@ +#ifndef _XT_STATISTIC_H +#define _XT_STATISTIC_H + +enum xt_statistic_mode { + XT_STATISTIC_MODE_RANDOM, + XT_STATISTIC_MODE_NTH, + __XT_STATISTIC_MODE_MAX +}; +#define XT_STATISTIC_MODE_MAX (__XT_STATISTIC_MODE_MAX - 1) + +enum xt_statistic_flags { + XT_STATISTIC_INVERT = 0x1, +}; +#define XT_STATISTIC_MASK 0x1 + +struct xt_statistic_info { + u_int16_t mode; + u_int16_t flags; + union { + struct { + u_int32_t probability; + } random; + struct { + u_int32_t every; + u_int32_t packet; + u_int32_t count; + } nth; + } u; + struct xt_statistic_info *master __attribute__((aligned(8))); +}; + +#endif /* _XT_STATISTIC_H */ -- cgit v1.2.2 From 39a27a35c5c1b5be499a0576a35c45a011788bf8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:23:54 -0700 Subject: [NETFILTER]: conntrack: add sysctl to disable checksumming Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 1 + include/linux/sysctl.h | 2 ++ include/net/netfilter/nf_conntrack.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index d54d7b278e..5473c01f69 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -293,6 +293,7 @@ static inline int is_dying(struct ip_conntrack *ct) } extern unsigned int ip_conntrack_htable_size; +extern int ip_conntrack_checksum; #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cd9e7c0825..98338ed2c0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -313,6 +313,7 @@ enum NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, + NET_NF_CONNTRACK_CHECKSUM=32, }; /* /proc/sys/net/ipv4 */ @@ -492,6 +493,7 @@ enum NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, NET_IPV4_NF_CONNTRACK_COUNT=27, + NET_IPV4_NF_CONNTRACK_CHECKSUM=28, }; /* /proc/sys/net/ipv6 */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 916013ca4a..dbe7a114d0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -285,6 +285,7 @@ static inline int nf_ct_is_dying(struct nf_conn *ct) } extern unsigned int nf_conntrack_htable_size; +extern int nf_conntrack_checksum; #define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) -- cgit v1.2.2 From 997ae831ade74bdaed4172b1c02060b9efd6e206 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 29 May 2006 18:24:20 -0700 Subject: [NETFILTER]: conntrack: add fixed timeout flag in connection tracking Add a flag in a connection status to have a non updated timeout. This permits to have connection that automatically die at a given time. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 3ff88c8783..d2e4bd7a7a 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -69,6 +69,10 @@ enum ip_conntrack_status { /* Connection is dying (removed from lists), can not be unset. */ IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), + + /* Connection has fixed timeout. */ + IPS_FIXED_TIMEOUT_BIT = 10, + IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; /* Connection tracking event bits */ -- cgit v1.2.2 From 3726add76643c715d437aceda320d319153b6113 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:24:39 -0700 Subject: [NETFILTER]: ctnetlink: fix NAT configuration The current configuration only allows to configure one manip and overloads conntrack status flags with netlink semantic. Signed-off-by: Patrick Mchardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 668ec946c8..b5883ccee2 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -27,13 +27,15 @@ enum ctattr_type { CTA_STATUS, CTA_PROTOINFO, CTA_HELP, - CTA_NAT, + CTA_NAT_SRC, +#define CTA_NAT CTA_NAT_SRC /* backwards compatibility */ CTA_TIMEOUT, CTA_MARK, CTA_COUNTERS_ORIG, CTA_COUNTERS_REPLY, CTA_USE, CTA_ID, + CTA_NAT_DST, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) -- cgit v1.2.2 From c0d4cfd96dd0cc0dbf49435898808b5553af4822 Mon Sep 17 00:00:00 2001 From: Jing Min Zhao Date: Mon, 29 May 2006 18:26:27 -0700 Subject: [NETFILTER]: H.323 helper: Add support for Call Forwarding Signed-off-by: Jing Min Zhao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 1 + include/linux/netfilter_ipv4/ip_conntrack_h323.h | 7 +++++++ include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 5473c01f69..17d7ef938a 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -154,6 +154,7 @@ struct ip_conntrack_expect unsigned int flags; #ifdef CONFIG_IP_NF_NAT_NEEDED + u_int32_t saved_ip; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ union ip_conntrack_manip_proto saved_proto; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h index eace86bd2a..3cbff73790 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_h323.h @@ -71,6 +71,13 @@ extern int (*nat_h245_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, unsigned char **data, int dataoff, TransportAddress * addr, u_int16_t port, struct ip_conntrack_expect * exp); +extern int (*nat_callforwarding_hook) (struct sk_buff ** pskb, + struct ip_conntrack * ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress * addr, + u_int16_t port, + struct ip_conntrack_expect * exp); extern int (*nat_q931_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress * addr, diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h index cc98f7aa5a..3d4a773799 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h @@ -1,4 +1,4 @@ -/* Generated by Jing Min Zhao's ASN.1 parser, Mar 15 2006 +/* Generated by Jing Min Zhao's ASN.1 parser, Apr 20 2006 * * Copyright (c) 2006 Jing Min Zhao * @@ -412,6 +412,7 @@ typedef struct Facility_UUIE { /* SEQUENCE */ eFacility_UUIE_destinationInfo = (1 << 14), eFacility_UUIE_h245SecurityMode = (1 << 13), } options; + TransportAddress alternativeAddress; FacilityReason reason; TransportAddress h245Address; Facility_UUIE_fastStart fastStart; -- cgit v1.2.2 From ae5b7d8ba2c28d7d9835856fe0ca5f6ec95ea768 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 May 2006 18:27:09 -0700 Subject: [NETFILTER]: Add SIP connection tracking helper Add SIP connection tracking helper. Originally written by Christian Hentschel , some cleanup, minor fixes and bidirectional SIP support added by myself. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_sip.h | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ip_conntrack_sip.h (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sip.h b/include/linux/netfilter_ipv4/ip_conntrack_sip.h new file mode 100644 index 0000000000..913dad66c0 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_sip.h @@ -0,0 +1,44 @@ +#ifndef __IP_CONNTRACK_SIP_H__ +#define __IP_CONNTRACK_SIP_H__ +#ifdef __KERNEL__ + +#define SIP_PORT 5060 +#define SIP_TIMEOUT 3600 + +#define POS_VIA 0 +#define POS_CONTACT 1 +#define POS_CONTENT 2 +#define POS_MEDIA 3 +#define POS_OWNER 4 +#define POS_CONNECTION 5 +#define POS_REQ_HEADER 6 +#define POS_SDP_HEADER 7 + +struct sip_header_nfo { + const char *lname; + const char *sname; + const char *ln_str; + size_t lnlen; + size_t snlen; + size_t ln_strlen; + int (*match_len)(const char *, const char *, int *); +}; + +extern unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr); +extern unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp, + const char *dptr); + +extern int ct_sip_get_info(const char *dptr, size_t dlen, + unsigned int *matchoff, + unsigned int *matchlen, + struct sip_header_nfo *hnfo); +extern int ct_sip_lnlen(const char *line, const char *limit); +extern const char *ct_sip_search(const char *needle, const char *haystack, + size_t needle_len, size_t haystack_len); +#endif /* __KERNEL__ */ +#endif /* __IP_CONNTRACK_SIP_H__ */ -- cgit v1.2.2 From 72dc5b9225c53310c010b68a70ea97c8c8e24bdf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Jun 2006 17:30:08 -0700 Subject: [TCP]: Minimum congestion window consolidation. Many of the TCP congestion methods all just use ssthresh as the minimum congestion window on decrease. Rather than duplicating the code, just have that be the default if that handle in the ops structure is not set. Minor behaviour change to TCP compound. It probably wants to use this (ssthresh) as lower bound, rather than ssthresh/2 because the latter causes undershoot on loss. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f1f472746e..de88c5472b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -632,7 +632,7 @@ struct tcp_congestion_ops { /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct sock *sk); + u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); @@ -667,7 +667,7 @@ extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct sock *sk); +extern u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) -- cgit v1.2.2 From 338fcf9886df9ad2873772197a73a57818973316 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Jun 2006 21:04:39 -0700 Subject: [IPV4] igmp: Fixup struct ip_mc_list::multiaddr type All users except two expect 32-bit big-endian value. One is of ->multiaddr = ->multiaddr variety. And last one is "%08lX". Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 28f4f3b369..899c3d4776 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -169,7 +169,7 @@ struct ip_sf_list struct ip_mc_list { struct in_device *interface; - unsigned long multiaddr; + __be32 multiaddr; struct ip_sf_list *sources; struct ip_sf_list *tomb; unsigned int sfmode; -- cgit v1.2.2 From 6d7416535097ed0943bdae8e69c14ba43061cab1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Jun 2006 21:06:41 -0700 Subject: [IPV4]: Right prototype of __raw_v4_lookup() All users pass 32-bit values as addresses and internally they're compared with 32-bit entities. So, change "laddr" and "raddr" types to __be32. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index e67b28a024..d83571fe4c 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -36,7 +36,7 @@ extern rwlock_t raw_v4_lock; extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, - unsigned long raddr, unsigned long laddr, + __be32 raddr, __be32 laddr, int dif); extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); -- cgit v1.2.2 From c8c05a8eec6f1258f6d5cb71a44ee5dc1e989b63 Mon Sep 17 00:00:00 2001 From: Catherine Zhang Date: Thu, 8 Jun 2006 23:39:49 -0700 Subject: [LSM-IPsec]: SELinux Authorize This patch contains a fix for the previous patch that adds security contexts to IPsec policies and security associations. In the previous patch, no authorization (besides the check for write permissions to SAD and SPD) is required to delete IPsec policies and security assocations with security contexts. Thus a user authorized to change SAD and SPD can bypass the IPsec policy authorization by simply deleteing policies with security contexts. To fix this security hole, an additional authorization check is added for removing security policies and security associations with security contexts. Note that if no security context is supplied on add or present on policy to be deleted, the SELinux module allows the change unconditionally. The hook is called on deletion when no context is present, which we may want to change. At present, I left it up to the module. LSM changes: The patch adds two new LSM hooks: xfrm_policy_delete and xfrm_state_delete. The new hooks are necessary to authorize deletion of IPsec policies that have security contexts. The existing hooks xfrm_policy_free and xfrm_state_free lack the context to do the authorization, so I decided to split authorization of deletion and memory management of security data, as is typical in the LSM interface. Use: The new delete hooks are checked when xfrm_policy or xfrm_state are deleted by either the xfrm_user interface (xfrm_get_policy, xfrm_del_sa) or the pfkey interface (pfkey_spddelete, pfkey_delete). SELinux changes: The new policy_delete and state_delete functions are added. Signed-off-by: Catherine Zhang Signed-off-by: Trent Jaeger Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/security.h | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 1bab48f6ae..14c9bd0506 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -805,31 +805,37 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->selector.security field. + * Allocate a security structure to the xp->security field. * The security field is initialized to NULL when the xfrm_policy is * allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. * @new contains a new xfrm_policy being cloned from old. - * Allocate a security structure to the new->selector.security field - * that contains the information from the old->selector.security field. + * Allocate a security structure to the new->security field + * that contains the information from the old->security field. * Return 0 if operation was successful (memory to allocate). * @xfrm_policy_free_security: * @xp contains the xfrm_policy - * Deallocate xp->selector.security. + * Deallocate xp->security. + * @xfrm_policy_delete_security: + * @xp contains the xfrm_policy. + * Authorize deletion of xp->security. * @xfrm_state_alloc_security: * @x contains the xfrm_state being added to the Security Association * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->sel.security field. The + * Allocate a security structure to the x->security field. The * security field is initialized to NULL when the xfrm_state is * allocated. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. - * Deallocate x>sel.security. + * Deallocate x->security. + * @xfrm_state_delete_security: + * @x contains the xfrm_state. + * Authorize deletion of x->security. * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. @@ -1298,8 +1304,10 @@ struct security_operations { int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); + int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); void (*xfrm_state_free_security) (struct xfrm_state *x); + int (*xfrm_state_delete_security) (struct xfrm_state *x); int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -2934,11 +2942,21 @@ static inline void security_xfrm_policy_free(struct xfrm_policy *xp) security_ops->xfrm_policy_free_security(xp); } +static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) +{ + return security_ops->xfrm_policy_delete_security(xp); +} + static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return security_ops->xfrm_state_alloc_security(x, sec_ctx); } +static inline int security_xfrm_state_delete(struct xfrm_state *x) +{ + return security_ops->xfrm_state_delete_security(x); +} + static inline void security_xfrm_state_free(struct xfrm_state *x) { security_ops->xfrm_state_free_security(x); @@ -2963,6 +2981,11 @@ static inline void security_xfrm_policy_free(struct xfrm_policy *xp) { } +static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) +{ + return 0; +} + static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return 0; @@ -2972,6 +2995,11 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) { } +static inline int security_xfrm_state_delete(struct xfrm_policy *xp) +{ + return 0; +} + static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; -- cgit v1.2.2 From 6f68dc37759b1d6ff3b4d4a9d097605a09f8f043 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 8 Jun 2006 23:58:52 -0700 Subject: [NET]: Fix warnings after LSM-IPSEC changes. Assignment used as truth value in xfrm_del_sa() and xfrm_get_policy(). Wrong argument type declared for security_xfrm_state_delete() when SELINUX is disabled. Signed-off-by: David S. Miller --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 14c9bd0506..4dfb1b84a9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2995,7 +2995,7 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) { } -static inline int security_xfrm_state_delete(struct xfrm_policy *xp) +static inline int security_xfrm_state_delete(struct xfrm_state *x) { return 0; } -- cgit v1.2.2 From c749b29fae74ed59c507d84025b3298202b42609 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:28:25 -0700 Subject: [SECMARK]: Add SELinux exports Add and export new functions to the in-kernel SELinux API in support of the new secmark-based packet controls. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/selinux.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 4047bcde44..aad4e390d6 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -118,6 +118,27 @@ void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid); */ void selinux_get_task_sid(struct task_struct *tsk, u32 *sid); +/** + * selinux_string_to_sid - map a security context string to a security ID + * @str: the security context string to be mapped + * @sid: ID value returned via this. + * + * Returns 0 if successful, with the SID stored in sid. A value + * of zero for sid indicates no SID could be determined (but no error + * occurred). + */ +int selinux_string_to_sid(char *str, u32 *sid); + +/** + * selinux_relabel_packet_permission - check permission to relabel a packet + * @sid: ID value to be applied to network packet (via SECMARK, most likely) + * + * Returns 0 if the current task is allowed to label packets with the + * supplied security ID. Note that it is implicit that the packet is always + * being relabeled from the default unlabled value, and that the access + * control decision is made in the AVC. + */ +int selinux_relabel_packet_permission(u32 sid); #else @@ -172,6 +193,17 @@ static inline void selinux_get_task_sid(struct task_struct *tsk, u32 *sid) *sid = 0; } +static inline int selinux_string_to_sid(const char *str, u32 *sid) +{ + *sid = 0; + return 0; +} + +static inline int selinux_relabel_packet_permission(u32 sid) +{ + return 0; +} + #endif /* CONFIG_SECURITY_SELINUX */ #endif /* _LINUX_SELINUX_H */ -- cgit v1.2.2 From 984bc16cc92ea3c247bf34ad667cfb95331b9d3c Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:29:17 -0700 Subject: [SECMARK]: Add secmark support to core networking. Add a secmark field to the skbuff structure, to allow security subsystems to place security markings on network packets. This is similar to the nfmark field, except is intended for implementing security policy, rather than than networking policy. This patch was already acked in principle by Dave Miller. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/skbuff.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 23bad3bf3c..fe2c58e530 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -210,6 +210,7 @@ enum { * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index * @tc_verd: traffic control verdict + * @secmark: security marking */ struct sk_buff { @@ -289,6 +290,9 @@ struct sk_buff { #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif +#ifdef CONFIG_NETWORK_SECMARK + __u32 secmark; +#endif /* These elements must be at the end, see alloc_skb() for details. */ @@ -1400,5 +1404,23 @@ static inline void nf_reset(struct sk_buff *skb) static inline void nf_reset(struct sk_buff *skb) {} #endif /* CONFIG_NETFILTER */ +#ifdef CONFIG_NETWORK_SECMARK +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ + to->secmark = from->secmark; +} + +static inline void skb_init_secmark(struct sk_buff *skb) +{ + skb->secmark = 0; +} +#else +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ } + +static inline void skb_init_secmark(struct sk_buff *skb) +{ } +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.2 From 5e6874cdb8de94cd3c15d853a8ef9c6f4c305055 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:30:57 -0700 Subject: [SECMARK]: Add xtables SECMARK target Add a SECMARK target to xtables, allowing the admin to apply security marks to packets via both iptables and ip6tables. The target currently handles SELinux security marking, but can be extended for other purposes as needed. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter/xt_SECMARK.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/netfilter/xt_SECMARK.h (limited to 'include') diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h new file mode 100644 index 0000000000..c53fbffa99 --- /dev/null +++ b/include/linux/netfilter/xt_SECMARK.h @@ -0,0 +1,26 @@ +#ifndef _XT_SECMARK_H_target +#define _XT_SECMARK_H_target + +/* + * This is intended for use by various security subsystems (but not + * at the same time). + * + * 'mode' refers to the specific security subsystem which the + * packets are being marked for. + */ +#define SECMARK_MODE_SEL 0x01 /* SELinux */ +#define SECMARK_SELCTX_MAX 256 + +struct xt_secmark_target_selinux_info { + u_int32_t selsid; + char selctx[SECMARK_SELCTX_MAX]; +}; + +struct xt_secmark_target_info { + u_int8_t mode; + union { + struct xt_secmark_target_selinux_info sel; + } u; +}; + +#endif /*_XT_SECMARK_H_target */ -- cgit v1.2.2 From 7c9728c393dceb724d66d696cfabce82151a78e5 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:31:46 -0700 Subject: [SECMARK]: Add secmark support to conntrack Add a secmark field to IP and NF conntracks, so that security markings on packets can be copied to their associated connections, and also copied back to packets as required. This is similar to the network mark field currently used with conntrack, although it is intended for enforcement of security policy rather than network policy. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 4 ++++ include/net/netfilter/nf_conntrack.h | 4 ++++ include/net/netfilter/nf_conntrack_compat.h | 26 ++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 17d7ef938a..e0e9951eb8 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -121,6 +121,10 @@ struct ip_conntrack u_int32_t mark; #endif +#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK + u_int32_t secmark; +#endif + /* Traversed often, so hopefully in different cacheline to top */ /* These are my tuples; original and reply */ struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dbe7a114d0..4111178158 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -114,6 +114,10 @@ struct nf_conn u_int32_t mark; #endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + u_int32_t secmark; +#endif + /* Storage reserved for other modules: */ union nf_conntrack_proto proto; diff --git a/include/net/netfilter/nf_conntrack_compat.h b/include/net/netfilter/nf_conntrack_compat.h index 3cac19fb36..f1b1482d72 100644 --- a/include/net/netfilter/nf_conntrack_compat.h +++ b/include/net/netfilter/nf_conntrack_compat.h @@ -20,6 +20,19 @@ static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb, } #endif /* CONFIG_IP_NF_CONNTRACK_MARK */ +#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK +static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb, + u_int32_t *ctinfo) +{ + struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo); + + if (ct) + return &ct->secmark; + else + return NULL; +} +#endif /* CONFIG_IP_NF_CONNTRACK_SECMARK */ + #ifdef CONFIG_IP_NF_CT_ACCT static inline struct ip_conntrack_counter * nf_ct_get_counters(const struct sk_buff *skb) @@ -70,6 +83,19 @@ static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb, } #endif /* CONFIG_NF_CONNTRACK_MARK */ +#ifdef CONFIG_NF_CONNTRACK_SECMARK +static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb, + u_int32_t *ctinfo) +{ + struct nf_conn *ct = nf_ct_get(skb, ctinfo); + + if (ct) + return &ct->secmark; + else + return NULL; +} +#endif /* CONFIG_NF_CONNTRACK_MARK */ + #ifdef CONFIG_NF_CT_ACCT static inline struct ip_conntrack_counter * nf_ct_get_counters(const struct sk_buff *skb) -- cgit v1.2.2 From 100468e9c05c10fb6872751c1af523b996d6afa9 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 9 Jun 2006 00:32:39 -0700 Subject: [SECMARK]: Add CONNSECMARK xtables target Add a new xtables target, CONNSECMARK, which is used to specify rules for copying security marks from packets to connections, and for copyying security marks back from connections to packets. This is similar to the CONNMARK target, but is more limited in scope in that it only allows copying of security marks to and from packets, as this is all it needs to do. A typical scenario would be to apply a security mark to a 'new' packet with SECMARK, then copy that to its conntrack via CONNMARK, and then restore the security mark from the connection to established and related packets on that connection. Signed-off-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netfilter/xt_CONNSECMARK.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/netfilter/xt_CONNSECMARK.h (limited to 'include') diff --git a/include/linux/netfilter/xt_CONNSECMARK.h b/include/linux/netfilter/xt_CONNSECMARK.h new file mode 100644 index 0000000000..c6bd75469b --- /dev/null +++ b/include/linux/netfilter/xt_CONNSECMARK.h @@ -0,0 +1,13 @@ +#ifndef _XT_CONNSECMARK_H_target +#define _XT_CONNSECMARK_H_target + +enum { + CONNSECMARK_SAVE = 1, + CONNSECMARK_RESTORE, +}; + +struct xt_connsecmark_target_info { + u_int8_t mode; +}; + +#endif /*_XT_CONNSECMARK_H_target */ -- cgit v1.2.2 From 932ff279a43ab7257942cddff2595acd541cc49b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 12:20:56 -0700 Subject: [NET]: Add netif_tx_lock Various drivers use xmit_lock internally to synchronise with their transmission routines. They do so without setting xmit_lock_owner. This is fine as long as netpoll is not in use. With netpoll it is possible for deadlocks to occur if xmit_lock_owner isn't set. This is because if a printk occurs while xmit_lock is held and xmit_lock_owner is not set can cause netpoll to attempt to take xmit_lock recursively. While it is possible to resolve this by getting netpoll to use trylock, it is suboptimal because netpoll's sole objective is to maximise the chance of getting the printk out on the wire. So delaying or dropping the message is to be avoided as much as possible. So the only alternative is to always set xmit_lock_owner. The following patch does this by introducing the netif_tx_lock family of functions that take care of setting/unsetting xmit_lock_owner. I renamed xmit_lock to _xmit_lock to indicate that it should not be used directly. I didn't provide irq versions of the netif_tx_lock functions since xmit_lock is meant to be a BH-disabling lock. This is pretty much a straight text substitution except for a small bug fix in winbond. It currently uses netif_stop_queue/spin_unlock_wait to stop transmission. This is unsafe as an IRQ can potentially wake up the queue. So it is safer to use netif_tx_disable. The hamradio bits used spin_lock_irq but it is unnecessary as xmit_lock must never be taken in an IRQ handler. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5760c67af..067b9ccafd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -407,7 +407,7 @@ struct net_device * One part is mostly used on xmit path (device) */ /* hard_start_xmit synchronizer */ - spinlock_t xmit_lock ____cacheline_aligned_in_smp; + spinlock_t _xmit_lock ____cacheline_aligned_in_smp; /* cpu id of processor entered to hard_start_xmit or -1, if nobody entered there. */ @@ -893,11 +893,43 @@ static inline void __netif_rx_complete(struct net_device *dev) clear_bit(__LINK_STATE_RX_SCHED, &dev->state); } +static inline void netif_tx_lock(struct net_device *dev) +{ + spin_lock(&dev->_xmit_lock); + dev->xmit_lock_owner = smp_processor_id(); +} + +static inline void netif_tx_lock_bh(struct net_device *dev) +{ + spin_lock_bh(&dev->_xmit_lock); + dev->xmit_lock_owner = smp_processor_id(); +} + +static inline int netif_tx_trylock(struct net_device *dev) +{ + int err = spin_trylock(&dev->_xmit_lock); + if (!err) + dev->xmit_lock_owner = smp_processor_id(); + return err; +} + +static inline void netif_tx_unlock(struct net_device *dev) +{ + dev->xmit_lock_owner = -1; + spin_unlock(&dev->_xmit_lock); +} + +static inline void netif_tx_unlock_bh(struct net_device *dev) +{ + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->_xmit_lock); +} + static inline void netif_tx_disable(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); netif_stop_queue(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } /* These functions live elsewhere (drivers/net/net_init.c, but related) */ -- cgit v1.2.2 From 364c6badde0dd62a0a38e5ed67f85d87d6665780 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 16:10:40 -0700 Subject: [NET]: Clean up skb_linearize The linearisation operation doesn't need to be super-optimised. So we can replace __skb_linearize with __pskb_pull_tail which does the same thing but is more general. Also, most users of skb_linearize end up testing whether the skb is linear or not so it helps to make skb_linearize do just that. Some callers of skb_linearize also use it to copy cloned data, so it's useful to have a new function skb_linearize_cow to copy the data if it's either non-linear or cloned. Last but not least, I've removed the gfp argument since nobody uses it anymore. If it's ever needed we can easily add it back. Misc bugs fixed by this patch: * via-velocity error handling (also, no SG => no frags) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe2c58e530..830f58fa03 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1169,18 +1169,34 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, return 0; } +static inline int __skb_linearize(struct sk_buff *skb) +{ + return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; +} + /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize - * @gfp: allocation mode * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); -static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) +static inline int skb_linearize(struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; +} + +/** + * skb_linearize_cow - make sure skb is linear and writable + * @skb: buffer to process + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +static inline int skb_linearize_cow(struct sk_buff *skb) { - return __skb_linearize(skb, gfp); + return skb_is_nonlinear(skb) || skb_cloned(skb) ? + __skb_linearize(skb) : 0; } /** -- cgit v1.2.2 From b38dfee3d616ffadb58d4215e3ff9d1d7921031e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 16:13:01 -0700 Subject: [NET]: skb_trim audit I found a few more spots where pskb_trim_rcsum could be used but were not. This patch changes them to use it. Also, sk_filter can get paged skb data. Therefore we must use pskb_trim instead of skb_trim. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 75b0e97ed9..96565ff0de 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -873,10 +873,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); - if (!pkt_len) - err = -EPERM; - else - skb_trim(skb, pkt_len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } if (needlock) -- cgit v1.2.2 From 3cc0e873986fe594d0e96d07259b11f755325cb2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Jun 2006 16:13:38 -0700 Subject: [NET]: Warn in __skb_trim if skb is paged It's better to warn and fail rather than rarely triggering BUG on paths that incorrectly call skb_trim/__skb_trim on a non-linear skb. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 830f58fa03..93e4db2215 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -975,15 +975,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_SKB_PAD 16 #endif -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data + len; - } else - ___pskb_trim(skb, len, 0); + if (unlikely(skb->data_len)) { + WARN_ON(1); + return; + } + skb->len = len; + skb->tail = skb->data + len; } /** @@ -993,6 +994,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. + * The skb must be linear. */ static inline void skb_trim(struct sk_buff *skb, unsigned int len) { @@ -1003,12 +1005,10 @@ static inline void skb_trim(struct sk_buff *skb, unsigned int len) static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } - return ___pskb_trim(skb, len, 1); + if (skb->data_len) + return ___pskb_trim(skb, len); + __skb_trim(skb, len); + return 0; } static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -- cgit v1.2.2 From bdeb04c6d9a957ae2a51c3033414467b82b2a736 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 11 Jun 2006 21:20:38 -0700 Subject: [NET]: net.ipv4.ip_autoconfig sysctl removal The sysctl net.ipv4.ip_autoconfig is a legacy value that is not used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ip.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 3d2e5ca62a..ead233c954 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -147,7 +147,6 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar struct ipv4_config { int log_martians; - int autoconfig; int no_pmtu_disc; }; -- cgit v1.2.2 From 35089bb203f44e33b6bbb6c4de0b0708f9a48921 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Jun 2006 22:33:04 -0700 Subject: [TCP]: Add tcp_slow_start_after_idle sysctl. A lot of people have asked for a way to disable tcp_cwnd_restart(), and it seems reasonable to add a sysctl to do that. Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/tcp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98338ed2c0..cee944dbdc 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -405,6 +405,7 @@ enum NET_TCP_BASE_MSS=114, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, + NET_TCP_SLOW_START_AFTER_IDLE=117, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index de88c5472b..bfc71f954b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,6 +227,7 @@ extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; +extern int sysctl_tcp_slow_start_after_idle; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; -- cgit v1.2.2 From 8648b3053bff39a7ee4c711d74268079c928a657 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 17 Jun 2006 22:06:05 -0700 Subject: [NET]: Add NETIF_F_GEN_CSUM and NETIF_F_ALL_CSUM The current stack treats NETIF_F_HW_CSUM and NETIF_F_NO_CSUM identically so we test for them in quite a few places. For the sake of brevity, I'm adding the macro NETIF_F_GEN_CSUM for these two. We also test the disjunct of NETIF_F_IP_CSUM and the other two in various places, for that purpose I've added NETIF_F_ALL_CSUM. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 067b9ccafd..e432b743dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -312,6 +312,9 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX */ #define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ +#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) + struct net_device *next_sched; /* Interface index. Unique device identifier */ -- cgit v1.2.2 From c7ce1ae21223fe1f905feba272bc14b87994a57d Mon Sep 17 00:00:00 2001 From: Tushar Gohad Date: Sat, 17 Jun 2006 22:54:03 -0700 Subject: [PFKEYV2]: Fix inconsistent typing in struct sadb_x_kmprivate. Signed-off-by: Tushar Gohad Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index bac0fb389c..d5dd471da2 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -159,7 +159,7 @@ struct sadb_spirange { struct sadb_x_kmprivate { uint16_t sadb_x_kmprivate_len; uint16_t sadb_x_kmprivate_exttype; - u_int32_t sadb_x_kmprivate_reserved; + uint32_t sadb_x_kmprivate_reserved; } __attribute__((packed)); /* sizeof(struct sadb_x_kmprivate) == 8 */ -- cgit v1.2.2 From 5636bef7324f49e36f05ec8a5f6284e11b1bcca4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 17 Jun 2006 22:55:35 -0700 Subject: [SCTP]: Reject sctp packets with broadcast addresses. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 7f4fea173f..5f69158c10 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -555,7 +555,8 @@ struct sctp_af { int (*to_addr_param) (const union sctp_addr *, union sctp_addr_param *); int (*addr_valid) (union sctp_addr *, - struct sctp_sock *); + struct sctp_sock *, + const struct sk_buff *); sctp_scope_t (*scope) (union sctp_addr *); void (*inaddr_any) (union sctp_addr *, unsigned short); int (*is_any) (const union sctp_addr *); -- cgit v1.2.2 From 4c9f5d5305a23851e67471b147e0d459a7166717 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 17 Jun 2006 22:56:08 -0700 Subject: [SCTP] Reset rtt_in_progress for the chunk when processing its sack. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index aa6033ca7c..b2b40f951a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -255,7 +255,7 @@ extern int sctp_debug_flag; #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG -#define SCTP_ASSERT(expr, str, func) +#define SCTP_ASSERT(expr, str, func) BUG_ON(!(expr)) #endif /* SCTP_DEBUG */ -- cgit v1.2.2