aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-27 07:38:02 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-27 07:38:02 -0400
commit5292ae11babca23c3ff82593630d2d7eebc350a9 (patch)
tree30a6c8123b35686098f306ea39398b7621f42054 /net
parentb0f209898f1a177bd503d49215b8c6628797a81c (diff)
parent0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)
Merge commit 'v2.6.28-rc2' into x86/uv
Diffstat (limited to 'net')
-rw-r--r--net/9p/Kconfig6
-rw-r--r--net/9p/Makefile4
-rw-r--r--net/9p/client.c1
-rw-r--r--net/9p/protocol.c33
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/9p/trans_rdma.c712
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_if.c14
-rw-r--r--net/core/dev.c135
-rw-r--r--net/ipv4/tcp_output.c25
-rw-r--r--net/sched/sch_cbq.c7
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/sm_statefuns.c54
-rw-r--r--net/sctp/sm_statetable.c4
-rw-r--r--net/unix/af_unix.c18
-rw-r--r--net/wireless/Kconfig11
16 files changed, 921 insertions, 111 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index ff34c5acc130..c42c0c400bf9 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -20,6 +20,12 @@ config NET_9P_VIRTIO
20 This builds support for a transports between 20 This builds support for a transports between
21 guest partitions and a host partition. 21 guest partitions and a host partition.
22 22
23config NET_9P_RDMA
24 depends on NET_9P && INFINIBAND && EXPERIMENTAL
25 tristate "9P RDMA Transport (Experimental)"
26 help
27 This builds support for a RDMA transport.
28
23config NET_9P_DEBUG 29config NET_9P_DEBUG
24 bool "Debug information" 30 bool "Debug information"
25 depends on NET_9P 31 depends on NET_9P
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 1041b7bd12e2..198a640d53a6 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,6 @@
1obj-$(CONFIG_NET_9P) := 9pnet.o 1obj-$(CONFIG_NET_9P) := 9pnet.o
2obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 2obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
3obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
3 4
49pnet-objs := \ 59pnet-objs := \
5 mod.o \ 6 mod.o \
@@ -11,3 +12,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
11 12
129pnet_virtio-objs := \ 139pnet_virtio-objs := \
13 trans_virtio.o \ 14 trans_virtio.o \
15
169pnet_rdma-objs := \
17 trans_rdma.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index bbac2f72b4d2..67717f69412e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -159,6 +159,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
159 159
160 if (!c->reqs[row]) { 160 if (!c->reqs[row]) {
161 printk(KERN_ERR "Couldn't grow tag array\n"); 161 printk(KERN_ERR "Couldn't grow tag array\n");
162 spin_unlock_irqrestore(&c->lock, flags);
162 return ERR_PTR(-ENOMEM); 163 return ERR_PTR(-ENOMEM);
163 } 164 }
164 for (col = 0; col < P9_ROW_MAXTAG; col++) { 165 for (col = 0; col < P9_ROW_MAXTAG; col++) {
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 29be52439086..dcd7666824ba 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -53,6 +53,7 @@
53static int 53static int
54p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); 54p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
55 55
56#ifdef CONFIG_NET_9P_DEBUG
56void 57void
57p9pdu_dump(int way, struct p9_fcall *pdu) 58p9pdu_dump(int way, struct p9_fcall *pdu)
58{ 59{
@@ -81,6 +82,12 @@ p9pdu_dump(int way, struct p9_fcall *pdu)
81 else 82 else
82 P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); 83 P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
83} 84}
85#else
86void
87p9pdu_dump(int way, struct p9_fcall *pdu)
88{
89}
90#endif
84EXPORT_SYMBOL(p9pdu_dump); 91EXPORT_SYMBOL(p9pdu_dump);
85 92
86void p9stat_free(struct p9_wstat *stbuf) 93void p9stat_free(struct p9_wstat *stbuf)
@@ -179,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
179 } 186 }
180 break; 187 break;
181 case 's':{ 188 case 's':{
182 char **ptr = va_arg(ap, char **); 189 char **sptr = va_arg(ap, char **);
183 int16_t len; 190 int16_t len;
184 int size; 191 int size;
185 192
@@ -189,17 +196,17 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
189 196
190 size = MAX(len, 0); 197 size = MAX(len, 0);
191 198
192 *ptr = kmalloc(size + 1, GFP_KERNEL); 199 *sptr = kmalloc(size + 1, GFP_KERNEL);
193 if (*ptr == NULL) { 200 if (*sptr == NULL) {
194 errcode = -EFAULT; 201 errcode = -EFAULT;
195 break; 202 break;
196 } 203 }
197 if (pdu_read(pdu, *ptr, size)) { 204 if (pdu_read(pdu, *sptr, size)) {
198 errcode = -EFAULT; 205 errcode = -EFAULT;
199 kfree(*ptr); 206 kfree(*sptr);
200 *ptr = NULL; 207 *sptr = NULL;
201 } else 208 } else
202 (*ptr)[size] = 0; 209 (*sptr)[size] = 0;
203 } 210 }
204 break; 211 break;
205 case 'Q':{ 212 case 'Q':{
@@ -373,13 +380,13 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
373 } 380 }
374 break; 381 break;
375 case 's':{ 382 case 's':{
376 const char *ptr = va_arg(ap, const char *); 383 const char *sptr = va_arg(ap, const char *);
377 int16_t len = 0; 384 int16_t len = 0;
378 if (ptr) 385 if (sptr)
379 len = MIN(strlen(ptr), USHORT_MAX); 386 len = MIN(strlen(sptr), USHORT_MAX);
380 387
381 errcode = p9pdu_writef(pdu, optional, "w", len); 388 errcode = p9pdu_writef(pdu, optional, "w", len);
382 if (!errcode && pdu_write(pdu, ptr, len)) 389 if (!errcode && pdu_write(pdu, sptr, len))
383 errcode = -EFAULT; 390 errcode = -EFAULT;
384 } 391 }
385 break; 392 break;
@@ -419,7 +426,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
419 case 'U':{ 426 case 'U':{
420 int32_t count = va_arg(ap, int32_t); 427 int32_t count = va_arg(ap, int32_t);
421 const char __user *udata = 428 const char __user *udata =
422 va_arg(ap, const void *); 429 va_arg(ap, const void __user *);
423 errcode = 430 errcode =
424 p9pdu_writef(pdu, optional, "d", count); 431 p9pdu_writef(pdu, optional, "d", count);
425 if (!errcode && pdu_write_u(pdu, udata, count)) 432 if (!errcode && pdu_write_u(pdu, udata, count))
@@ -542,8 +549,10 @@ int p9pdu_finalize(struct p9_fcall *pdu)
542 err = p9pdu_writef(pdu, 0, "d", size); 549 err = p9pdu_writef(pdu, 0, "d", size);
543 pdu->size = size; 550 pdu->size = size;
544 551
552#ifdef CONFIG_NET_9P_DEBUG
545 if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) 553 if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
546 p9pdu_dump(0, pdu); 554 p9pdu_dump(0, pdu);
555#endif
547 556
548 P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, 557 P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
549 pdu->id, pdu->tag); 558 pdu->id, pdu->tag);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index be65d8242fd2..1df0356f242b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -678,11 +678,9 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
678 678
679static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) 679static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
680{ 680{
681 struct p9_trans_fd *ts = client->trans;
682 struct p9_conn *m = ts->conn;
683 int ret = 1; 681 int ret = 1;
684 682
685 P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req); 683 P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
686 684
687 spin_lock(&client->lock); 685 spin_lock(&client->lock);
688 list_del(&req->req_list); 686 list_del(&req->req_list);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
new file mode 100644
index 000000000000..8d6cc4777aae
--- /dev/null
+++ b/net/9p/trans_rdma.c
@@ -0,0 +1,712 @@
1/*
2 * linux/fs/9p/trans_rdma.c
3 *
4 * RDMA transport layer based on the trans_fd.c implementation.
5 *
6 * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
7 * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
8 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
9 * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
10 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to:
23 * Free Software Foundation
24 * 51 Franklin Street, Fifth Floor
25 * Boston, MA 02111-1301 USA
26 *
27 */
28
29#include <linux/in.h>
30#include <linux/module.h>
31#include <linux/net.h>
32#include <linux/ipv6.h>
33#include <linux/kthread.h>
34#include <linux/errno.h>
35#include <linux/kernel.h>
36#include <linux/un.h>
37#include <linux/uaccess.h>
38#include <linux/inet.h>
39#include <linux/idr.h>
40#include <linux/file.h>
41#include <linux/parser.h>
42#include <linux/semaphore.h>
43#include <net/9p/9p.h>
44#include <net/9p/client.h>
45#include <net/9p/transport.h>
46#include <rdma/ib_verbs.h>
47#include <rdma/rdma_cm.h>
48#include <rdma/ib_verbs.h>
49
50#define P9_PORT 5640
51#define P9_RDMA_SQ_DEPTH 32
52#define P9_RDMA_RQ_DEPTH 32
53#define P9_RDMA_SEND_SGE 4
54#define P9_RDMA_RECV_SGE 4
55#define P9_RDMA_IRD 0
56#define P9_RDMA_ORD 0
57#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
58#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
59 * safely advertise a maxsize
60 * of 64k */
61
62#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
63/**
64 * struct p9_trans_rdma - RDMA transport instance
65 *
66 * @state: tracks the transport state machine for connection setup and tear down
67 * @cm_id: The RDMA CM ID
68 * @pd: Protection Domain pointer
69 * @qp: Queue Pair pointer
70 * @cq: Completion Queue pointer
71 * @lkey: The local access only memory region key
72 * @timeout: Number of uSecs to wait for connection management events
73 * @sq_depth: The depth of the Send Queue
74 * @sq_sem: Semaphore for the SQ
75 * @rq_depth: The depth of the Receive Queue.
76 * @addr: The remote peer's address
77 * @req_lock: Protects the active request list
78 * @send_wait: Wait list when the SQ fills up
79 * @cm_done: Completion event for connection management tracking
80 */
81struct p9_trans_rdma {
82 enum {
83 P9_RDMA_INIT,
84 P9_RDMA_ADDR_RESOLVED,
85 P9_RDMA_ROUTE_RESOLVED,
86 P9_RDMA_CONNECTED,
87 P9_RDMA_FLUSHING,
88 P9_RDMA_CLOSING,
89 P9_RDMA_CLOSED,
90 } state;
91 struct rdma_cm_id *cm_id;
92 struct ib_pd *pd;
93 struct ib_qp *qp;
94 struct ib_cq *cq;
95 struct ib_mr *dma_mr;
96 u32 lkey;
97 long timeout;
98 int sq_depth;
99 struct semaphore sq_sem;
100 int rq_depth;
101 atomic_t rq_count;
102 struct sockaddr_in addr;
103 spinlock_t req_lock;
104
105 struct completion cm_done;
106};
107
108/**
109 * p9_rdma_context - Keeps track of in-process WR
110 *
111 * @wc_op: The original WR op for when the CQE completes in error.
112 * @busa: Bus address to unmap when the WR completes
113 * @req: Keeps track of requests (send)
114 * @rc: Keepts track of replies (receive)
115 */
116struct p9_rdma_req;
117struct p9_rdma_context {
118 enum ib_wc_opcode wc_op;
119 dma_addr_t busa;
120 union {
121 struct p9_req_t *req;
122 struct p9_fcall *rc;
123 };
124};
125
126/**
127 * p9_rdma_opts - Collection of mount options
128 * @port: port of connection
129 * @sq_depth: The requested depth of the SQ. This really doesn't need
130 * to be any deeper than the number of threads used in the client
131 * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
132 * @timeout: Time to wait in msecs for CM events
133 */
134struct p9_rdma_opts {
135 short port;
136 int sq_depth;
137 int rq_depth;
138 long timeout;
139};
140
141/*
142 * Option Parsing (code inspired by NFS code)
143 */
144enum {
145 /* Options that take integer arguments */
146 Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err,
147};
148
149static match_table_t tokens = {
150 {Opt_port, "port=%u"},
151 {Opt_sq_depth, "sq=%u"},
152 {Opt_rq_depth, "rq=%u"},
153 {Opt_timeout, "timeout=%u"},
154 {Opt_err, NULL},
155};
156
157/**
158 * parse_options - parse mount options into session structure
159 * @options: options string passed from mount
160 * @opts: transport-specific structure to parse options into
161 *
162 * Returns 0 upon success, -ERRNO upon failure
163 */
164static int parse_opts(char *params, struct p9_rdma_opts *opts)
165{
166 char *p;
167 substring_t args[MAX_OPT_ARGS];
168 int option;
169 char *options;
170 int ret;
171
172 opts->port = P9_PORT;
173 opts->sq_depth = P9_RDMA_SQ_DEPTH;
174 opts->rq_depth = P9_RDMA_RQ_DEPTH;
175 opts->timeout = P9_RDMA_TIMEOUT;
176
177 if (!params)
178 return 0;
179
180 options = kstrdup(params, GFP_KERNEL);
181 if (!options) {
182 P9_DPRINTK(P9_DEBUG_ERROR,
183 "failed to allocate copy of option string\n");
184 return -ENOMEM;
185 }
186
187 while ((p = strsep(&options, ",")) != NULL) {
188 int token;
189 int r;
190 if (!*p)
191 continue;
192 token = match_token(p, tokens, args);
193 r = match_int(&args[0], &option);
194 if (r < 0) {
195 P9_DPRINTK(P9_DEBUG_ERROR,
196 "integer field, but no integer?\n");
197 ret = r;
198 continue;
199 }
200 switch (token) {
201 case Opt_port:
202 opts->port = option;
203 break;
204 case Opt_sq_depth:
205 opts->sq_depth = option;
206 break;
207 case Opt_rq_depth:
208 opts->rq_depth = option;
209 break;
210 case Opt_timeout:
211 opts->timeout = option;
212 break;
213 default:
214 continue;
215 }
216 }
217 /* RQ must be at least as large as the SQ */
218 opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
219 kfree(options);
220 return 0;
221}
222
223static int
224p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
225{
226 struct p9_client *c = id->context;
227 struct p9_trans_rdma *rdma = c->trans;
228 switch (event->event) {
229 case RDMA_CM_EVENT_ADDR_RESOLVED:
230 BUG_ON(rdma->state != P9_RDMA_INIT);
231 rdma->state = P9_RDMA_ADDR_RESOLVED;
232 break;
233
234 case RDMA_CM_EVENT_ROUTE_RESOLVED:
235 BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
236 rdma->state = P9_RDMA_ROUTE_RESOLVED;
237 break;
238
239 case RDMA_CM_EVENT_ESTABLISHED:
240 BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
241 rdma->state = P9_RDMA_CONNECTED;
242 break;
243
244 case RDMA_CM_EVENT_DISCONNECTED:
245 if (rdma)
246 rdma->state = P9_RDMA_CLOSED;
247 if (c)
248 c->status = Disconnected;
249 break;
250
251 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
252 break;
253
254 case RDMA_CM_EVENT_ADDR_CHANGE:
255 case RDMA_CM_EVENT_ROUTE_ERROR:
256 case RDMA_CM_EVENT_DEVICE_REMOVAL:
257 case RDMA_CM_EVENT_MULTICAST_JOIN:
258 case RDMA_CM_EVENT_MULTICAST_ERROR:
259 case RDMA_CM_EVENT_REJECTED:
260 case RDMA_CM_EVENT_CONNECT_REQUEST:
261 case RDMA_CM_EVENT_CONNECT_RESPONSE:
262 case RDMA_CM_EVENT_CONNECT_ERROR:
263 case RDMA_CM_EVENT_ADDR_ERROR:
264 case RDMA_CM_EVENT_UNREACHABLE:
265 c->status = Disconnected;
266 rdma_disconnect(rdma->cm_id);
267 break;
268 default:
269 BUG();
270 }
271 complete(&rdma->cm_done);
272 return 0;
273}
274
275static void
276handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
277 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
278{
279 struct p9_req_t *req;
280 int err = 0;
281 int16_t tag;
282
283 req = NULL;
284 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
285 DMA_FROM_DEVICE);
286
287 if (status != IB_WC_SUCCESS)
288 goto err_out;
289
290 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
291 if (err)
292 goto err_out;
293
294 req = p9_tag_lookup(client, tag);
295 if (!req)
296 goto err_out;
297
298 req->rc = c->rc;
299 p9_client_cb(client, req);
300
301 return;
302
303 err_out:
304 P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n",
305 req, err, status);
306 rdma->state = P9_RDMA_FLUSHING;
307 client->status = Disconnected;
308 return;
309}
310
311static void
312handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
313 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
314{
315 ib_dma_unmap_single(rdma->cm_id->device,
316 c->busa, c->req->tc->size,
317 DMA_TO_DEVICE);
318}
319
320static void qp_event_handler(struct ib_event *event, void *context)
321{
322 P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event,
323 context);
324}
325
326static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
327{
328 struct p9_client *client = cq_context;
329 struct p9_trans_rdma *rdma = client->trans;
330 int ret;
331 struct ib_wc wc;
332
333 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
334 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
335 struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
336
337 switch (c->wc_op) {
338 case IB_WC_RECV:
339 atomic_dec(&rdma->rq_count);
340 handle_recv(client, rdma, c, wc.status, wc.byte_len);
341 break;
342
343 case IB_WC_SEND:
344 handle_send(client, rdma, c, wc.status, wc.byte_len);
345 up(&rdma->sq_sem);
346 break;
347
348 default:
349 printk(KERN_ERR "9prdma: unexpected completion type, "
350 "c->wc_op=%d, wc.opcode=%d, status=%d\n",
351 c->wc_op, wc.opcode, wc.status);
352 break;
353 }
354 kfree(c);
355 }
356}
357
358static void cq_event_handler(struct ib_event *e, void *v)
359{
360 P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
361}
362
363static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
364{
365 if (!rdma)
366 return;
367
368 if (rdma->dma_mr && !IS_ERR(rdma->dma_mr))
369 ib_dereg_mr(rdma->dma_mr);
370
371 if (rdma->qp && !IS_ERR(rdma->qp))
372 ib_destroy_qp(rdma->qp);
373
374 if (rdma->pd && !IS_ERR(rdma->pd))
375 ib_dealloc_pd(rdma->pd);
376
377 if (rdma->cq && !IS_ERR(rdma->cq))
378 ib_destroy_cq(rdma->cq);
379
380 if (rdma->cm_id && !IS_ERR(rdma->cm_id))
381 rdma_destroy_id(rdma->cm_id);
382
383 kfree(rdma);
384}
385
386static int
387post_recv(struct p9_client *client, struct p9_rdma_context *c)
388{
389 struct p9_trans_rdma *rdma = client->trans;
390 struct ib_recv_wr wr, *bad_wr;
391 struct ib_sge sge;
392
393 c->busa = ib_dma_map_single(rdma->cm_id->device,
394 c->rc->sdata, client->msize,
395 DMA_FROM_DEVICE);
396 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
397 goto error;
398
399 sge.addr = c->busa;
400 sge.length = client->msize;
401 sge.lkey = rdma->lkey;
402
403 wr.next = NULL;
404 c->wc_op = IB_WC_RECV;
405 wr.wr_id = (unsigned long) c;
406 wr.sg_list = &sge;
407 wr.num_sge = 1;
408 return ib_post_recv(rdma->qp, &wr, &bad_wr);
409
410 error:
411 P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
412 return -EIO;
413}
414
415static int rdma_request(struct p9_client *client, struct p9_req_t *req)
416{
417 struct p9_trans_rdma *rdma = client->trans;
418 struct ib_send_wr wr, *bad_wr;
419 struct ib_sge sge;
420 int err = 0;
421 unsigned long flags;
422 struct p9_rdma_context *c = NULL;
423 struct p9_rdma_context *rpl_context = NULL;
424
425 /* Allocate an fcall for the reply */
426 rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
427 if (!rpl_context)
428 goto err_close;
429
430 /*
431 * If the request has a buffer, steal it, otherwise
432 * allocate a new one. Typically, requests should already
433 * have receive buffers allocated and just swap them around
434 */
435 if (!req->rc) {
436 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
437 GFP_KERNEL);
438 if (req->rc) {
439 req->rc->sdata = (char *) req->rc +
440 sizeof(struct p9_fcall);
441 req->rc->capacity = client->msize;
442 }
443 }
444 rpl_context->rc = req->rc;
445 if (!rpl_context->rc) {
446 kfree(rpl_context);
447 goto err_close;
448 }
449
450 /*
451 * Post a receive buffer for this request. We need to ensure
452 * there is a reply buffer available for every outstanding
453 * request. A flushed request can result in no reply for an
454 * outstanding request, so we must keep a count to avoid
455 * overflowing the RQ.
456 */
457 if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
458 err = post_recv(client, rpl_context);
459 if (err) {
460 kfree(rpl_context->rc);
461 kfree(rpl_context);
462 goto err_close;
463 }
464 } else
465 atomic_dec(&rdma->rq_count);
466
467 /* remove posted receive buffer from request structure */
468 req->rc = NULL;
469
470 /* Post the request */
471 c = kmalloc(sizeof *c, GFP_KERNEL);
472 if (!c)
473 goto err_close;
474 c->req = req;
475
476 c->busa = ib_dma_map_single(rdma->cm_id->device,
477 c->req->tc->sdata, c->req->tc->size,
478 DMA_TO_DEVICE);
479 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
480 goto error;
481
482 sge.addr = c->busa;
483 sge.length = c->req->tc->size;
484 sge.lkey = rdma->lkey;
485
486 wr.next = NULL;
487 c->wc_op = IB_WC_SEND;
488 wr.wr_id = (unsigned long) c;
489 wr.opcode = IB_WR_SEND;
490 wr.send_flags = IB_SEND_SIGNALED;
491 wr.sg_list = &sge;
492 wr.num_sge = 1;
493
494 if (down_interruptible(&rdma->sq_sem))
495 goto error;
496
497 return ib_post_send(rdma->qp, &wr, &bad_wr);
498
499 error:
500 P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
501 return -EIO;
502
503 err_close:
504 spin_lock_irqsave(&rdma->req_lock, flags);
505 if (rdma->state < P9_RDMA_CLOSING) {
506 rdma->state = P9_RDMA_CLOSING;
507 spin_unlock_irqrestore(&rdma->req_lock, flags);
508 rdma_disconnect(rdma->cm_id);
509 } else
510 spin_unlock_irqrestore(&rdma->req_lock, flags);
511 return err;
512}
513
514static void rdma_close(struct p9_client *client)
515{
516 struct p9_trans_rdma *rdma;
517
518 if (!client)
519 return;
520
521 rdma = client->trans;
522 if (!rdma)
523 return;
524
525 client->status = Disconnected;
526 rdma_disconnect(rdma->cm_id);
527 rdma_destroy_trans(rdma);
528}
529
530/**
531 * alloc_rdma - Allocate and initialize the rdma transport structure
532 * @msize: MTU
533 * @dotu: Extension attribute
534 * @opts: Mount options structure
535 */
536static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
537{
538 struct p9_trans_rdma *rdma;
539
540 rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
541 if (!rdma)
542 return NULL;
543
544 rdma->sq_depth = opts->sq_depth;
545 rdma->rq_depth = opts->rq_depth;
546 rdma->timeout = opts->timeout;
547 spin_lock_init(&rdma->req_lock);
548 init_completion(&rdma->cm_done);
549 sema_init(&rdma->sq_sem, rdma->sq_depth);
550 atomic_set(&rdma->rq_count, 0);
551
552 return rdma;
553}
554
555/* its not clear to me we can do anything after send has been posted */
556static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
557{
558 return 1;
559}
560
561/**
562 * trans_create_rdma - Transport method for creating atransport instance
563 * @client: client instance
564 * @addr: IP address string
565 * @args: Mount options string
566 */
567static int
568rdma_create_trans(struct p9_client *client, const char *addr, char *args)
569{
570 int err;
571 struct p9_rdma_opts opts;
572 struct p9_trans_rdma *rdma;
573 struct rdma_conn_param conn_param;
574 struct ib_qp_init_attr qp_attr;
575 struct ib_device_attr devattr;
576
577 /* Parse the transport specific mount options */
578 err = parse_opts(args, &opts);
579 if (err < 0)
580 return err;
581
582 /* Create and initialize the RDMA transport structure */
583 rdma = alloc_rdma(&opts);
584 if (!rdma)
585 return -ENOMEM;
586
587 /* Create the RDMA CM ID */
588 rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
589 if (IS_ERR(rdma->cm_id))
590 goto error;
591
592 /* Resolve the server's address */
593 rdma->addr.sin_family = AF_INET;
594 rdma->addr.sin_addr.s_addr = in_aton(addr);
595 rdma->addr.sin_port = htons(opts.port);
596 err = rdma_resolve_addr(rdma->cm_id, NULL,
597 (struct sockaddr *)&rdma->addr,
598 rdma->timeout);
599 if (err)
600 goto error;
601 err = wait_for_completion_interruptible(&rdma->cm_done);
602 if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
603 goto error;
604
605 /* Resolve the route to the server */
606 err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
607 if (err)
608 goto error;
609 err = wait_for_completion_interruptible(&rdma->cm_done);
610 if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
611 goto error;
612
613 /* Query the device attributes */
614 err = ib_query_device(rdma->cm_id->device, &devattr);
615 if (err)
616 goto error;
617
618 /* Create the Completion Queue */
619 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
620 cq_event_handler, client,
621 opts.sq_depth + opts.rq_depth + 1, 0);
622 if (IS_ERR(rdma->cq))
623 goto error;
624 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
625
626 /* Create the Protection Domain */
627 rdma->pd = ib_alloc_pd(rdma->cm_id->device);
628 if (IS_ERR(rdma->pd))
629 goto error;
630
631 /* Cache the DMA lkey in the transport */
632 rdma->dma_mr = NULL;
633 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
634 rdma->lkey = rdma->cm_id->device->local_dma_lkey;
635 else {
636 rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
637 if (IS_ERR(rdma->dma_mr))
638 goto error;
639 rdma->lkey = rdma->dma_mr->lkey;
640 }
641
642 /* Create the Queue Pair */
643 memset(&qp_attr, 0, sizeof qp_attr);
644 qp_attr.event_handler = qp_event_handler;
645 qp_attr.qp_context = client;
646 qp_attr.cap.max_send_wr = opts.sq_depth;
647 qp_attr.cap.max_recv_wr = opts.rq_depth;
648 qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
649 qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
650 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
651 qp_attr.qp_type = IB_QPT_RC;
652 qp_attr.send_cq = rdma->cq;
653 qp_attr.recv_cq = rdma->cq;
654 err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
655 if (err)
656 goto error;
657 rdma->qp = rdma->cm_id->qp;
658
659 /* Request a connection */
660 memset(&conn_param, 0, sizeof(conn_param));
661 conn_param.private_data = NULL;
662 conn_param.private_data_len = 0;
663 conn_param.responder_resources = P9_RDMA_IRD;
664 conn_param.initiator_depth = P9_RDMA_ORD;
665 err = rdma_connect(rdma->cm_id, &conn_param);
666 if (err)
667 goto error;
668 err = wait_for_completion_interruptible(&rdma->cm_done);
669 if (err || (rdma->state != P9_RDMA_CONNECTED))
670 goto error;
671
672 client->trans = rdma;
673 client->status = Connected;
674
675 return 0;
676
677error:
678 rdma_destroy_trans(rdma);
679 return -ENOTCONN;
680}
681
682static struct p9_trans_module p9_rdma_trans = {
683 .name = "rdma",
684 .maxsize = P9_RDMA_MAXSIZE,
685 .def = 0,
686 .owner = THIS_MODULE,
687 .create = rdma_create_trans,
688 .close = rdma_close,
689 .request = rdma_request,
690 .cancel = rdma_cancel,
691};
692
693/**
694 * p9_trans_rdma_init - Register the 9P RDMA transport driver
695 */
696static int __init p9_trans_rdma_init(void)
697{
698 v9fs_register_trans(&p9_rdma_trans);
699 return 0;
700}
701
702static void __exit p9_trans_rdma_exit(void)
703{
704 v9fs_unregister_trans(&p9_rdma_trans);
705}
706
707module_init(p9_trans_rdma_init);
708module_exit(p9_trans_rdma_exit);
709
710MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
711MODULE_DESCRIPTION("RDMA Transport for 9P");
712MODULE_LICENSE("Dual BSD/GPL");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 22ba8632196f..6c023f0f8252 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev)
179 179
180 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 180 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
181 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | 181 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
182 NETIF_F_NETNS_LOCAL; 182 NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
183} 183}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 573e20f7dba4..0a09ccf68c1c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -347,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br)
347void br_features_recompute(struct net_bridge *br) 347void br_features_recompute(struct net_bridge *br)
348{ 348{
349 struct net_bridge_port *p; 349 struct net_bridge_port *p;
350 unsigned long features; 350 unsigned long features, mask;
351 351
352 features = br->feature_mask; 352 features = mask = br->feature_mask;
353 if (list_empty(&br->port_list))
354 goto done;
355
356 features &= ~NETIF_F_ONE_FOR_ALL;
353 357
354 list_for_each_entry(p, &br->port_list, list) { 358 list_for_each_entry(p, &br->port_list, list) {
355 features = netdev_compute_features(features, p->dev->features); 359 features = netdev_increment_features(features,
360 p->dev->features, mask);
356 } 361 }
357 362
358 br->dev->features = features; 363done:
364 br->dev->features = netdev_fix_features(features, NULL);
359} 365}
360 366
361/* called with RTNL */ 367/* called with RTNL */
diff --git a/net/core/dev.c b/net/core/dev.c
index b8a4fd0806af..d9038e328cc1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3947,6 +3947,46 @@ static void netdev_init_queue_locks(struct net_device *dev)
3947 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); 3947 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3948} 3948}
3949 3949
3950unsigned long netdev_fix_features(unsigned long features, const char *name)
3951{
3952 /* Fix illegal SG+CSUM combinations. */
3953 if ((features & NETIF_F_SG) &&
3954 !(features & NETIF_F_ALL_CSUM)) {
3955 if (name)
3956 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3957 "checksum feature.\n", name);
3958 features &= ~NETIF_F_SG;
3959 }
3960
3961 /* TSO requires that SG is present as well. */
3962 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3963 if (name)
3964 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3965 "SG feature.\n", name);
3966 features &= ~NETIF_F_TSO;
3967 }
3968
3969 if (features & NETIF_F_UFO) {
3970 if (!(features & NETIF_F_GEN_CSUM)) {
3971 if (name)
3972 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3973 "since no NETIF_F_HW_CSUM feature.\n",
3974 name);
3975 features &= ~NETIF_F_UFO;
3976 }
3977
3978 if (!(features & NETIF_F_SG)) {
3979 if (name)
3980 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3981 "since no NETIF_F_SG feature.\n", name);
3982 features &= ~NETIF_F_UFO;
3983 }
3984 }
3985
3986 return features;
3987}
3988EXPORT_SYMBOL(netdev_fix_features);
3989
3950/** 3990/**
3951 * register_netdevice - register a network device 3991 * register_netdevice - register a network device
3952 * @dev: device to register 3992 * @dev: device to register
@@ -4032,36 +4072,7 @@ int register_netdevice(struct net_device *dev)
4032 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); 4072 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4033 } 4073 }
4034 4074
4035 4075 dev->features = netdev_fix_features(dev->features, dev->name);
4036 /* Fix illegal SG+CSUM combinations. */
4037 if ((dev->features & NETIF_F_SG) &&
4038 !(dev->features & NETIF_F_ALL_CSUM)) {
4039 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
4040 dev->name);
4041 dev->features &= ~NETIF_F_SG;
4042 }
4043
4044 /* TSO requires that SG is present as well. */
4045 if ((dev->features & NETIF_F_TSO) &&
4046 !(dev->features & NETIF_F_SG)) {
4047 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
4048 dev->name);
4049 dev->features &= ~NETIF_F_TSO;
4050 }
4051 if (dev->features & NETIF_F_UFO) {
4052 if (!(dev->features & NETIF_F_HW_CSUM)) {
4053 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4054 "NETIF_F_HW_CSUM feature.\n",
4055 dev->name);
4056 dev->features &= ~NETIF_F_UFO;
4057 }
4058 if (!(dev->features & NETIF_F_SG)) {
4059 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4060 "NETIF_F_SG feature.\n",
4061 dev->name);
4062 dev->features &= ~NETIF_F_UFO;
4063 }
4064 }
4065 4076
4066 /* Enable software GSO if SG is supported. */ 4077 /* Enable software GSO if SG is supported. */
4067 if (dev->features & NETIF_F_SG) 4078 if (dev->features & NETIF_F_SG)
@@ -4700,49 +4711,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; }
4700#endif /* CONFIG_NET_DMA */ 4711#endif /* CONFIG_NET_DMA */
4701 4712
4702/** 4713/**
4703 * netdev_compute_feature - compute conjunction of two feature sets 4714 * netdev_increment_features - increment feature set by one
4704 * @all: first feature set 4715 * @all: current feature set
4705 * @one: second feature set 4716 * @one: new feature set
4717 * @mask: mask feature set
4706 * 4718 *
4707 * Computes a new feature set after adding a device with feature set 4719 * Computes a new feature set after adding a device with feature set
4708 * @one to the master device with current feature set @all. Returns 4720 * @one to the master device with current feature set @all. Will not
4709 * the new feature set. 4721 * enable anything that is off in @mask. Returns the new feature set.
4710 */ 4722 */
4711int netdev_compute_features(unsigned long all, unsigned long one) 4723unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4712{ 4724 unsigned long mask)
4713 /* if device needs checksumming, downgrade to hw checksumming */ 4725{
4714 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) 4726 /* If device needs checksumming, downgrade to it. */
4715 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; 4727 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4716 4728 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4717 /* if device can't do all checksum, downgrade to ipv4/ipv6 */ 4729 else if (mask & NETIF_F_ALL_CSUM) {
4718 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) 4730 /* If one device supports v4/v6 checksumming, set for all. */
4719 all ^= NETIF_F_HW_CSUM 4731 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4720 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 4732 !(all & NETIF_F_GEN_CSUM)) {
4721 4733 all &= ~NETIF_F_ALL_CSUM;
4722 if (one & NETIF_F_GSO) 4734 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4723 one |= NETIF_F_GSO_SOFTWARE; 4735 }
4724 one |= NETIF_F_GSO;
4725
4726 /*
4727 * If even one device supports a GSO protocol with software fallback,
4728 * enable it for all.
4729 */
4730 all |= one & NETIF_F_GSO_SOFTWARE;
4731 4736
4732 /* If even one device supports robust GSO, enable it for all. */ 4737 /* If one device supports hw checksumming, set for all. */
4733 if (one & NETIF_F_GSO_ROBUST) 4738 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4734 all |= NETIF_F_GSO_ROBUST; 4739 all &= ~NETIF_F_ALL_CSUM;
4740 all |= NETIF_F_HW_CSUM;
4741 }
4742 }
4735 4743
4736 all &= one | NETIF_F_LLTX; 4744 one |= NETIF_F_ALL_CSUM;
4737 4745
4738 if (!(all & NETIF_F_ALL_CSUM)) 4746 one |= all & NETIF_F_ONE_FOR_ALL;
4739 all &= ~NETIF_F_SG; 4747 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4740 if (!(all & NETIF_F_SG)) 4748 all |= one & mask & NETIF_F_ONE_FOR_ALL;
4741 all &= ~NETIF_F_GSO_MASK;
4742 4749
4743 return all; 4750 return all;
4744} 4751}
4745EXPORT_SYMBOL(netdev_compute_features); 4752EXPORT_SYMBOL(netdev_increment_features);
4746 4753
4747static struct hlist_head *netdev_create_hash(void) 4754static struct hlist_head *netdev_create_hash(void)
4748{ 4755{
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 990a58493235..e4c5ac9fe89b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -362,6 +362,17 @@ struct tcp_out_options {
362 __u32 tsval, tsecr; /* need to include OPTION_TS */ 362 __u32 tsval, tsecr; /* need to include OPTION_TS */
363}; 363};
364 364
365/* Beware: Something in the Internet is very sensitive to the ordering of
366 * TCP options, we learned this through the hard way, so be careful here.
367 * Luckily we can at least blame others for their non-compliance but from
368 * inter-operatibility perspective it seems that we're somewhat stuck with
369 * the ordering which we have been using if we want to keep working with
370 * those broken things (not that it currently hurts anybody as there isn't
371 * particular reason why the ordering would need to be changed).
372 *
373 * At least SACK_PERM as the first option is known to lead to a disaster
374 * (but it may well be that other scenarios fail similarly).
375 */
365static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, 376static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
366 const struct tcp_out_options *opts, 377 const struct tcp_out_options *opts,
367 __u8 **md5_hash) { 378 __u8 **md5_hash) {
@@ -376,6 +387,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
376 *md5_hash = NULL; 387 *md5_hash = NULL;
377 } 388 }
378 389
390 if (unlikely(opts->mss)) {
391 *ptr++ = htonl((TCPOPT_MSS << 24) |
392 (TCPOLEN_MSS << 16) |
393 opts->mss);
394 }
395
379 if (likely(OPTION_TS & opts->options)) { 396 if (likely(OPTION_TS & opts->options)) {
380 if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { 397 if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
381 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | 398 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
@@ -392,12 +409,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
392 *ptr++ = htonl(opts->tsecr); 409 *ptr++ = htonl(opts->tsecr);
393 } 410 }
394 411
395 if (unlikely(opts->mss)) {
396 *ptr++ = htonl((TCPOPT_MSS << 24) |
397 (TCPOLEN_MSS << 16) |
398 opts->mss);
399 }
400
401 if (unlikely(OPTION_SACK_ADVERTISE & opts->options && 412 if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
402 !(OPTION_TS & opts->options))) { 413 !(OPTION_TS & opts->options))) {
403 *ptr++ = htonl((TCPOPT_NOP << 24) | 414 *ptr++ = htonl((TCPOPT_NOP << 24) |
@@ -432,7 +443,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
432 443
433 if (tp->rx_opt.dsack) { 444 if (tp->rx_opt.dsack) {
434 tp->rx_opt.dsack = 0; 445 tp->rx_opt.dsack = 0;
435 tp->rx_opt.eff_sacks--; 446 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
436 } 447 }
437 } 448 }
438} 449}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 8b06fa900482..03e389e8d945 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -545,9 +545,10 @@ static void cbq_ovl_delay(struct cbq_class *cl)
545 expires = ktime_set(0, 0); 545 expires = ktime_set(0, 0);
546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched)); 546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
547 if (hrtimer_try_to_cancel(&q->delay_timer) && 547 if (hrtimer_try_to_cancel(&q->delay_timer) &&
548 ktime_to_ns(ktime_sub(q->delay_timer.expires, 548 ktime_to_ns(ktime_sub(
549 expires)) > 0) 549 hrtimer_get_expires(&q->delay_timer),
550 q->delay_timer.expires = expires; 550 expires)) > 0)
551 hrtimer_set_expires(&q->delay_timer, expires);
551 hrtimer_restart(&q->delay_timer); 552 hrtimer_restart(&q->delay_timer);
552 cl->delayed = 1; 553 cl->delayed = 1;
553 cl->xstats.overactions++; 554 cl->xstats.overactions++;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a49fa80b57b9..bf612d954d41 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -369,7 +369,7 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
369void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, 369void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
370 struct sctp_transport *t, __u32 pmtu) 370 struct sctp_transport *t, __u32 pmtu)
371{ 371{
372 if (!t || (t->pathmtu == pmtu)) 372 if (!t || (t->pathmtu <= pmtu))
373 return; 373 return;
374 374
375 if (sock_owned_by_user(sk)) { 375 if (sock_owned_by_user(sk)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d4c3fbc4671e..a6a0ea71ae93 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2544,6 +2544,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
2544 sctp_shutdownhdr_t *sdh; 2544 sctp_shutdownhdr_t *sdh;
2545 sctp_disposition_t disposition; 2545 sctp_disposition_t disposition;
2546 struct sctp_ulpevent *ev; 2546 struct sctp_ulpevent *ev;
2547 __u32 ctsn;
2547 2548
2548 if (!sctp_vtag_verify(chunk, asoc)) 2549 if (!sctp_vtag_verify(chunk, asoc))
2549 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2550 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2558,6 +2559,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
2558 sdh = (sctp_shutdownhdr_t *)chunk->skb->data; 2559 sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
2559 skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); 2560 skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
2560 chunk->subh.shutdown_hdr = sdh; 2561 chunk->subh.shutdown_hdr = sdh;
2562 ctsn = ntohl(sdh->cum_tsn_ack);
2563
2564 /* If Cumulative TSN Ack beyond the max tsn currently
2565 * send, terminating the association and respond to the
2566 * sender with an ABORT.
2567 */
2568 if (!TSN_lt(ctsn, asoc->next_tsn))
2569 return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
2561 2570
2562 /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT 2571 /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
2563 * When a peer sends a SHUTDOWN, SCTP delivers this notification to 2572 * When a peer sends a SHUTDOWN, SCTP delivers this notification to
@@ -2599,6 +2608,51 @@ out:
2599 return disposition; 2608 return disposition;
2600} 2609}
2601 2610
2611/*
2612 * sctp_sf_do_9_2_shut_ctsn
2613 *
2614 * Once an endpoint has reached the SHUTDOWN-RECEIVED state,
2615 * it MUST NOT send a SHUTDOWN in response to a ULP request.
2616 * The Cumulative TSN Ack of the received SHUTDOWN chunk
2617 * MUST be processed.
2618 */
2619sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
2620 const struct sctp_association *asoc,
2621 const sctp_subtype_t type,
2622 void *arg,
2623 sctp_cmd_seq_t *commands)
2624{
2625 struct sctp_chunk *chunk = arg;
2626 sctp_shutdownhdr_t *sdh;
2627
2628 if (!sctp_vtag_verify(chunk, asoc))
2629 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2630
2631 /* Make sure that the SHUTDOWN chunk has a valid length. */
2632 if (!sctp_chunk_length_valid(chunk,
2633 sizeof(struct sctp_shutdown_chunk_t)))
2634 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
2635 commands);
2636
2637 sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
2638
2639 /* If Cumulative TSN Ack beyond the max tsn currently
2640 * send, terminating the association and respond to the
2641 * sender with an ABORT.
2642 */
2643 if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn))
2644 return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
2645
2646 /* verify, by checking the Cumulative TSN Ack field of the
2647 * chunk, that all its outstanding DATA chunks have been
2648 * received by the SHUTDOWN sender.
2649 */
2650 sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN,
2651 SCTP_BE32(sdh->cum_tsn_ack));
2652
2653 return SCTP_DISPOSITION_CONSUME;
2654}
2655
2602/* RFC 2960 9.2 2656/* RFC 2960 9.2
2603 * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk 2657 * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk
2604 * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination 2658 * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index dd4ddc40c0ad..5c8186d88c61 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -266,11 +266,11 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
266 /* SCTP_STATE_ESTABLISHED */ \ 266 /* SCTP_STATE_ESTABLISHED */ \
267 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ 267 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \
268 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 268 /* SCTP_STATE_SHUTDOWN_PENDING */ \
269 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 269 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \
270 /* SCTP_STATE_SHUTDOWN_SENT */ \ 270 /* SCTP_STATE_SHUTDOWN_SENT */ \
271 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ 271 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \
272 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 272 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
273 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 273 TYPE_SCTP_FUNC(sctp_sf_do_9_2_shut_ctsn), \
274 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 274 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
275 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 275 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
276} /* TYPE_SCTP_SHUTDOWN */ 276} /* TYPE_SCTP_SHUTDOWN */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c647aab8d418..dc504d308ec0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -711,28 +711,30 @@ static struct sock *unix_find_other(struct net *net,
711 int type, unsigned hash, int *error) 711 int type, unsigned hash, int *error)
712{ 712{
713 struct sock *u; 713 struct sock *u;
714 struct nameidata nd; 714 struct path path;
715 int err = 0; 715 int err = 0;
716 716
717 if (sunname->sun_path[0]) { 717 if (sunname->sun_path[0]) {
718 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); 718 struct inode *inode;
719 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
719 if (err) 720 if (err)
720 goto fail; 721 goto fail;
721 err = vfs_permission(&nd, MAY_WRITE); 722 inode = path.dentry->d_inode;
723 err = inode_permission(inode, MAY_WRITE);
722 if (err) 724 if (err)
723 goto put_fail; 725 goto put_fail;
724 726
725 err = -ECONNREFUSED; 727 err = -ECONNREFUSED;
726 if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode)) 728 if (!S_ISSOCK(inode->i_mode))
727 goto put_fail; 729 goto put_fail;
728 u = unix_find_socket_byinode(net, nd.path.dentry->d_inode); 730 u = unix_find_socket_byinode(net, inode);
729 if (!u) 731 if (!u)
730 goto put_fail; 732 goto put_fail;
731 733
732 if (u->sk_type == type) 734 if (u->sk_type == type)
733 touch_atime(nd.path.mnt, nd.path.dentry); 735 touch_atime(path.mnt, path.dentry);
734 736
735 path_put(&nd.path); 737 path_put(&path);
736 738
737 err=-EPROTOTYPE; 739 err=-EPROTOTYPE;
738 if (u->sk_type != type) { 740 if (u->sk_type != type) {
@@ -753,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
753 return u; 755 return u;
754 756
755put_fail: 757put_fail:
756 path_put(&nd.path); 758 path_put(&path);
757fail: 759fail:
758 *error=err; 760 *error=err;
759 return NULL; 761 return NULL;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 7d82be07fa1d..646c7121dbc0 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -16,7 +16,7 @@ config NL80211
16 16
17config WIRELESS_OLD_REGULATORY 17config WIRELESS_OLD_REGULATORY
18 bool "Old wireless static regulatory definitions" 18 bool "Old wireless static regulatory definitions"
19 default n 19 default y
20 ---help--- 20 ---help---
21 This option enables the old static regulatory information 21 This option enables the old static regulatory information
22 and uses it within the new framework. This is available 22 and uses it within the new framework. This is available
@@ -40,11 +40,10 @@ config WIRELESS_OLD_REGULATORY
40 ieee80211_regdom module parameter. This is being phased out and you 40 ieee80211_regdom module parameter. This is being phased out and you
41 should stop using them ASAP. 41 should stop using them ASAP.
42 42
43 Say N unless you cannot install a new userspace application 43 Say Y unless you have installed a new userspace application.
44 or have one currently depending on the ieee80211_regdom module 44 Also say Y if have one currently depending on the ieee80211_regdom
45 parameter and cannot port it to use the new userspace interfaces. 45 module parameter and cannot port it to use the new userspace
46 46 interfaces.
47 This is scheduled for removal for 2.6.29.
48 47
49config WIRELESS_EXT 48config WIRELESS_EXT
50 bool "Wireless extensions" 49 bool "Wireless extensions"