aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2011-04-13 07:32:28 -0400
committerPatrick McHardy <kaber@trash.net>2011-04-13 07:32:28 -0400
commitb32e3dc7860d00124fa432dba09667e647cb9bcc (patch)
tree2fa6e56f389431dfb84609d3d7572cad76e88e71 /net
parent6604271c5bc658a6067ed0c3deba4d89e0e50382 (diff)
parent96120d86fe302c006259baee9061eea9e1b9e486 (diff)
Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c10
-rw-r--r--net/9p/protocol.c6
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_rdma.c6
-rw-r--r--net/9p/trans_virtio.c72
-rw-r--r--net/9p/util.c2
-rw-r--r--net/appletalk/ddp.c8
-rw-r--r--net/atm/common.c1
-rw-r--r--net/bluetooth/hci_core.c10
-rw-r--r--net/bluetooth/hci_event.c4
-rw-r--r--net/bluetooth/l2cap_core.c4
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/mgmt.c2
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_stp_if.c11
-rw-r--r--net/can/af_can.c9
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/raw.c4
-rw-r--r--net/ceph/armor.c4
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/osd_client.c624
-rw-r--r--net/core/dev.c98
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/ethtool.c19
-rw-r--r--net/core/pktgen.c5
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c30
-rw-r--r--net/ipv4/fib_frontend.c114
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c47
-rw-r--r--net/ipv4/fib_trie.c18
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/netfilter.c5
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter.c13
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/iriap.c6
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/mac80211/key.c7
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c25
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/sta_info.c1
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c16
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_addrtype.c42
-rw-r--r--net/netfilter/xt_conntrack.c2
-rw-r--r--net/rds/cong.c9
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rose/rose_loopback.c13
-rw-r--r--net/rose/rose_route.c20
-rw-r--r--net/rose/rose_subr.c101
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c38
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/svcauth_unix.c18
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/wireless/scan.c31
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_replay.c17
-rw-r--r--net/xfrm/xfrm_state.c21
-rw-r--r--net/xfrm/xfrm_user.c26
76 files changed, 1195 insertions, 495 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 347ec0cd2718..2ccbf04d37df 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -223,7 +223,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
223 223
224 req = &c->reqs[row][col]; 224 req = &c->reqs[row][col];
225 if (!req->tc) { 225 if (!req->tc) {
226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); 226 req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
227 if (!req->wq) { 227 if (!req->wq) {
228 printk(KERN_ERR "Couldn't grow tag array\n"); 228 printk(KERN_ERR "Couldn't grow tag array\n");
229 return ERR_PTR(-ENOMEM); 229 return ERR_PTR(-ENOMEM);
@@ -233,17 +233,17 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
233 P9_TRANS_PREF_PAYLOAD_SEP) { 233 P9_TRANS_PREF_PAYLOAD_SEP) {
234 int alloc_msize = min(c->msize, 4096); 234 int alloc_msize = min(c->msize, 4096);
235 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, 235 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
236 GFP_KERNEL); 236 GFP_NOFS);
237 req->tc->capacity = alloc_msize; 237 req->tc->capacity = alloc_msize;
238 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, 238 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
239 GFP_KERNEL); 239 GFP_NOFS);
240 req->rc->capacity = alloc_msize; 240 req->rc->capacity = alloc_msize;
241 } else { 241 } else {
242 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, 242 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
243 GFP_KERNEL); 243 GFP_NOFS);
244 req->tc->capacity = c->msize; 244 req->tc->capacity = c->msize;
245 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, 245 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
246 GFP_KERNEL); 246 GFP_NOFS);
247 req->rc->capacity = c->msize; 247 req->rc->capacity = c->msize;
248 } 248 }
249 if ((!req->tc) || (!req->rc)) { 249 if ((!req->tc) || (!req->rc)) {
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 2ce515b859b3..8a4084fa8b5a 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -205,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
205 if (errcode) 205 if (errcode)
206 break; 206 break;
207 207
208 *sptr = kmalloc(len + 1, GFP_KERNEL); 208 *sptr = kmalloc(len + 1, GFP_NOFS);
209 if (*sptr == NULL) { 209 if (*sptr == NULL) {
210 errcode = -EFAULT; 210 errcode = -EFAULT;
211 break; 211 break;
@@ -273,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
273 if (!errcode) { 273 if (!errcode) {
274 *wnames = 274 *wnames =
275 kmalloc(sizeof(char *) * *nwname, 275 kmalloc(sizeof(char *) * *nwname,
276 GFP_KERNEL); 276 GFP_NOFS);
277 if (!*wnames) 277 if (!*wnames)
278 errcode = -ENOMEM; 278 errcode = -ENOMEM;
279 } 279 }
@@ -317,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
317 *wqids = 317 *wqids =
318 kmalloc(*nwqid * 318 kmalloc(*nwqid *
319 sizeof(struct p9_qid), 319 sizeof(struct p9_qid),
320 GFP_KERNEL); 320 GFP_NOFS);
321 if (*wqids == NULL) 321 if (*wqids == NULL)
322 errcode = -ENOMEM; 322 errcode = -ENOMEM;
323 } 323 }
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d62b9aa58df8..9172ab78fcb0 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -41,9 +41,9 @@ EXPORT_SYMBOL(p9_release_req_pages);
41int 41int
42p9_nr_pages(struct p9_req_t *req) 42p9_nr_pages(struct p9_req_t *req)
43{ 43{
44 int start_page, end_page; 44 unsigned long start_page, end_page;
45 start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; 45 start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
46 end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + 46 end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
47 PAGE_SIZE - 1) >> PAGE_SHIFT; 47 PAGE_SIZE - 1) >> PAGE_SHIFT;
48 return end_page - start_page; 48 return end_page - start_page;
49} 49}
@@ -69,8 +69,8 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
69 *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); 69 *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
70 70
71 if (*pdata_off) 71 if (*pdata_off)
72 first_page_bytes = min((PAGE_SIZE - *pdata_off), 72 first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
73 req->tc->pbuf_size); 73 req->tc->pbuf_size);
74 74
75 rpinfo = req->tc->private; 75 rpinfo = req->tc->private;
76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, 76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a30471e51740..aa5672b15eae 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -350,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
350 350
351 if (m->req->rc == NULL) { 351 if (m->req->rc == NULL) {
352 m->req->rc = kmalloc(sizeof(struct p9_fcall) + 352 m->req->rc = kmalloc(sizeof(struct p9_fcall) +
353 m->client->msize, GFP_KERNEL); 353 m->client->msize, GFP_NOFS);
354 if (!m->req->rc) { 354 if (!m->req->rc) {
355 m->req = NULL; 355 m->req = NULL;
356 err = -ENOMEM; 356 err = -ENOMEM;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 29a54ccd213d..150e0c4bbf40 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -424,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
424 struct p9_rdma_context *rpl_context = NULL; 424 struct p9_rdma_context *rpl_context = NULL;
425 425
426 /* Allocate an fcall for the reply */ 426 /* Allocate an fcall for the reply */
427 rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); 427 rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
428 if (!rpl_context) { 428 if (!rpl_context) {
429 err = -ENOMEM; 429 err = -ENOMEM;
430 goto err_close; 430 goto err_close;
@@ -437,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
437 */ 437 */
438 if (!req->rc) { 438 if (!req->rc) {
439 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, 439 req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
440 GFP_KERNEL); 440 GFP_NOFS);
441 if (req->rc) { 441 if (req->rc) {
442 req->rc->sdata = (char *) req->rc + 442 req->rc->sdata = (char *) req->rc +
443 sizeof(struct p9_fcall); 443 sizeof(struct p9_fcall);
@@ -468,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
468 req->rc = NULL; 468 req->rc = NULL;
469 469
470 /* Post the request */ 470 /* Post the request */
471 c = kmalloc(sizeof *c, GFP_KERNEL); 471 c = kmalloc(sizeof *c, GFP_NOFS);
472 if (!c) { 472 if (!c) {
473 err = -ENOMEM; 473 err = -ENOMEM;
474 goto err_free1; 474 goto err_free1;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9b550ed9c711..e8f046b07182 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
43#include <net/9p/client.h> 43#include <net/9p/client.h>
44#include <net/9p/transport.h> 44#include <net/9p/transport.h>
45#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
46#include <linux/swap.h>
46#include <linux/virtio.h> 47#include <linux/virtio.h>
47#include <linux/virtio_9p.h> 48#include <linux/virtio_9p.h>
48#include "trans_common.h" 49#include "trans_common.h"
@@ -51,6 +52,8 @@
51 52
52/* a single mutex to manage channel initialization and attachment */ 53/* a single mutex to manage channel initialization and attachment */
53static DEFINE_MUTEX(virtio_9p_lock); 54static DEFINE_MUTEX(virtio_9p_lock);
55static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
56static atomic_t vp_pinned = ATOMIC_INIT(0);
54 57
55/** 58/**
56 * struct virtio_chan - per-instance transport information 59 * struct virtio_chan - per-instance transport information
@@ -78,7 +81,10 @@ struct virtio_chan {
78 struct virtqueue *vq; 81 struct virtqueue *vq;
79 int ring_bufs_avail; 82 int ring_bufs_avail;
80 wait_queue_head_t *vc_wq; 83 wait_queue_head_t *vc_wq;
81 84 /* This is global limit. Since we don't have a global structure,
85 * will be placing it in each channel.
86 */
87 int p9_max_pages;
82 /* Scatterlist: can be too big for stack. */ 88 /* Scatterlist: can be too big for stack. */
83 struct scatterlist sg[VIRTQUEUE_NUM]; 89 struct scatterlist sg[VIRTQUEUE_NUM];
84 90
@@ -141,34 +147,36 @@ static void req_done(struct virtqueue *vq)
141 147
142 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 148 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
143 149
144 do { 150 while (1) {
145 spin_lock_irqsave(&chan->lock, flags); 151 spin_lock_irqsave(&chan->lock, flags);
146 rc = virtqueue_get_buf(chan->vq, &len); 152 rc = virtqueue_get_buf(chan->vq, &len);
147 153
148 if (rc != NULL) { 154 if (rc == NULL) {
149 if (!chan->ring_bufs_avail) {
150 chan->ring_bufs_avail = 1;
151 wake_up(chan->vc_wq);
152 }
153 spin_unlock_irqrestore(&chan->lock, flags);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
155 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
156 rc->tag);
157 req = p9_tag_lookup(chan->client, rc->tag);
158 req->status = REQ_STATUS_RCVD;
159 if (req->tc->private) {
160 struct trans_rpage_info *rp = req->tc->private;
161 /*Release pages */
162 p9_release_req_pages(rp);
163 if (rp->rp_alloc)
164 kfree(rp);
165 req->tc->private = NULL;
166 }
167 p9_client_cb(chan->client, req);
168 } else {
169 spin_unlock_irqrestore(&chan->lock, flags); 155 spin_unlock_irqrestore(&chan->lock, flags);
156 break;
157 }
158
159 chan->ring_bufs_avail = 1;
160 spin_unlock_irqrestore(&chan->lock, flags);
161 /* Wakeup if anyone waiting for VirtIO ring space. */
162 wake_up(chan->vc_wq);
163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
165 req = p9_tag_lookup(chan->client, rc->tag);
166 if (req->tc->private) {
167 struct trans_rpage_info *rp = req->tc->private;
168 int p = rp->rp_nr_pages;
169 /*Release pages */
170 p9_release_req_pages(rp);
171 atomic_sub(p, &vp_pinned);
172 wake_up(&vp_wq);
173 if (rp->rp_alloc)
174 kfree(rp);
175 req->tc->private = NULL;
170 } 176 }
171 } while (rc != NULL); 177 req->status = REQ_STATUS_RCVD;
178 p9_client_cb(chan->client, req);
179 }
172} 180}
173 181
174/** 182/**
@@ -263,7 +271,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
263 271
264 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
265 273
266req_retry:
267 req->status = REQ_STATUS_SENT; 274 req->status = REQ_STATUS_SENT;
268 275
269 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { 276 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
@@ -271,6 +278,14 @@ req_retry:
271 int rpinfo_size = sizeof(struct trans_rpage_info) + 278 int rpinfo_size = sizeof(struct trans_rpage_info) +
272 sizeof(struct page *) * nr_pages; 279 sizeof(struct page *) * nr_pages;
273 280
281 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
282 err = wait_event_interruptible(vp_wq,
283 atomic_read(&vp_pinned) < chan->p9_max_pages);
284 if (err == -ERESTARTSYS)
285 return err;
286 P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
287 }
288
274 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { 289 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
275 /* We can use sdata */ 290 /* We can use sdata */
276 req->tc->private = req->tc->sdata + req->tc->size; 291 req->tc->private = req->tc->sdata + req->tc->size;
@@ -293,9 +308,12 @@ req_retry:
293 if (rpinfo->rp_alloc) 308 if (rpinfo->rp_alloc)
294 kfree(rpinfo); 309 kfree(rpinfo);
295 return err; 310 return err;
311 } else {
312 atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
296 } 313 }
297 } 314 }
298 315
316req_retry_pinned:
299 spin_lock_irqsave(&chan->lock, flags); 317 spin_lock_irqsave(&chan->lock, flags);
300 318
301 /* Handle out VirtIO ring buffers */ 319 /* Handle out VirtIO ring buffers */
@@ -356,7 +374,7 @@ req_retry:
356 return err; 374 return err;
357 375
358 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 376 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
359 goto req_retry; 377 goto req_retry_pinned;
360 } else { 378 } else {
361 spin_unlock_irqrestore(&chan->lock, flags); 379 spin_unlock_irqrestore(&chan->lock, flags);
362 P9_DPRINTK(P9_DEBUG_TRANS, 380 P9_DPRINTK(P9_DEBUG_TRANS,
@@ -453,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
453 } 471 }
454 init_waitqueue_head(chan->vc_wq); 472 init_waitqueue_head(chan->vc_wq);
455 chan->ring_bufs_avail = 1; 473 chan->ring_bufs_avail = 1;
474 /* Ceiling limit to avoid denial of service attacks */
475 chan->p9_max_pages = nr_free_buffer_pages()/4;
456 476
457 mutex_lock(&virtio_9p_lock); 477 mutex_lock(&virtio_9p_lock);
458 list_add_tail(&chan->chan_list, &virtio_chan_list); 478 list_add_tail(&chan->chan_list, &virtio_chan_list);
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701a72d2..b84619b5ba22 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
92 unsigned long flags; 92 unsigned long flags;
93 93
94retry: 94retry:
95 if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) 95 if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
96 return 0; 96 return 0;
97 97
98 spin_lock_irqsave(&p->lock, flags); 98 spin_lock_irqsave(&p->lock, flags);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3d4f4b043406..956a5302002a 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,13 +1051,17 @@ static int atalk_release(struct socket *sock)
1051{ 1051{
1052 struct sock *sk = sock->sk; 1052 struct sock *sk = sock->sk;
1053 1053
1054 lock_sock(sk);
1055 if (sk) { 1054 if (sk) {
1055 sock_hold(sk);
1056 lock_sock(sk);
1057
1056 sock_orphan(sk); 1058 sock_orphan(sk);
1057 sock->sk = NULL; 1059 sock->sk = NULL;
1058 atalk_destroy_socket(sk); 1060 atalk_destroy_socket(sk);
1061
1062 release_sock(sk);
1063 sock_put(sk);
1059 } 1064 }
1060 release_sock(sk);
1061 return 0; 1065 return 0;
1062} 1066}
1063 1067
diff --git a/net/atm/common.c b/net/atm/common.c
index 1b9c52a02cd3..22b963d06a10 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -252,6 +252,7 @@ void atm_dev_release_vccs(struct atm_dev *dev)
252 } 252 }
253 write_unlock_irq(&vcc_sklist_lock); 253 write_unlock_irq(&vcc_sklist_lock);
254} 254}
255EXPORT_SYMBOL(atm_dev_release_vccs);
255 256
256static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) 257static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
257{ 258{
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b372fb8bcdcf..2216620ff296 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -186,6 +186,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
186 BT_DBG("%s %ld", hdev->name, opt); 186 BT_DBG("%s %ld", hdev->name, opt);
187 187
188 /* Reset device */ 188 /* Reset device */
189 set_bit(HCI_RESET, &hdev->flags);
189 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 190 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
190} 191}
191 192
@@ -213,8 +214,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
213 /* Mandatory initialization */ 214 /* Mandatory initialization */
214 215
215 /* Reset */ 216 /* Reset */
216 if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) 217 if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) {
218 set_bit(HCI_RESET, &hdev->flags);
217 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 219 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
220 }
218 221
219 /* Read Local Supported Features */ 222 /* Read Local Supported Features */
220 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); 223 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
@@ -584,6 +587,9 @@ static int hci_dev_do_close(struct hci_dev *hdev)
584 hci_req_cancel(hdev, ENODEV); 587 hci_req_cancel(hdev, ENODEV);
585 hci_req_lock(hdev); 588 hci_req_lock(hdev);
586 589
590 /* Stop timer, it might be running */
591 del_timer_sync(&hdev->cmd_timer);
592
587 if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { 593 if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
588 hci_req_unlock(hdev); 594 hci_req_unlock(hdev);
589 return 0; 595 return 0;
@@ -623,7 +629,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
623 629
624 /* Drop last sent command */ 630 /* Drop last sent command */
625 if (hdev->sent_cmd) { 631 if (hdev->sent_cmd) {
626 del_timer_sync(&hdev->cmd_timer);
627 kfree_skb(hdev->sent_cmd); 632 kfree_skb(hdev->sent_cmd);
628 hdev->sent_cmd = NULL; 633 hdev->sent_cmd = NULL;
629 } 634 }
@@ -1074,6 +1079,7 @@ static void hci_cmd_timer(unsigned long arg)
1074 1079
1075 BT_ERR("%s command tx timeout", hdev->name); 1080 BT_ERR("%s command tx timeout", hdev->name);
1076 atomic_set(&hdev->cmd_cnt, 1); 1081 atomic_set(&hdev->cmd_cnt, 1);
1082 clear_bit(HCI_RESET, &hdev->flags);
1077 tasklet_schedule(&hdev->cmd_task); 1083 tasklet_schedule(&hdev->cmd_task);
1078} 1084}
1079 1085
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 3fbfa50c2bff..cebe7588469f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -183,6 +183,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
183 183
184 BT_DBG("%s status 0x%x", hdev->name, status); 184 BT_DBG("%s status 0x%x", hdev->name, status);
185 185
186 clear_bit(HCI_RESET, &hdev->flags);
187
186 hci_req_complete(hdev, HCI_OP_RESET, status); 188 hci_req_complete(hdev, HCI_OP_RESET, status);
187} 189}
188 190
@@ -1847,7 +1849,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
1847 if (ev->opcode != HCI_OP_NOP) 1849 if (ev->opcode != HCI_OP_NOP)
1848 del_timer(&hdev->cmd_timer); 1850 del_timer(&hdev->cmd_timer);
1849 1851
1850 if (ev->ncmd) { 1852 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
1851 atomic_set(&hdev->cmd_cnt, 1); 1853 atomic_set(&hdev->cmd_cnt, 1);
1852 if (!skb_queue_empty(&hdev->cmd_q)) 1854 if (!skb_queue_empty(&hdev->cmd_q))
1853 tasklet_schedule(&hdev->cmd_task); 1855 tasklet_schedule(&hdev->cmd_task);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index c9f9cecca527..ca27f3a41536 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1116,7 +1116,9 @@ int l2cap_ertm_send(struct sock *sk)
1116 bt_cb(skb)->tx_seq = pi->next_tx_seq; 1116 bt_cb(skb)->tx_seq = pi->next_tx_seq;
1117 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; 1117 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
1118 1118
1119 pi->unacked_frames++; 1119 if (bt_cb(skb)->retries == 1)
1120 pi->unacked_frames++;
1121
1120 pi->frames_sent++; 1122 pi->frames_sent++;
1121 1123
1122 if (skb_queue_is_last(TX_QUEUE(sk), skb)) 1124 if (skb_queue_is_last(TX_QUEUE(sk), skb))
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index fc85e7ae33c7..f77308e63e58 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -923,8 +923,9 @@ void __l2cap_sock_close(struct sock *sk, int reason)
923 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); 923 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
924 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 924 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
925 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 925 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
926 } else 926 }
927 l2cap_chan_del(sk, reason); 927
928 l2cap_chan_del(sk, reason);
928 break; 929 break;
929 930
930 case BT_CONNECT: 931 case BT_CONNECT:
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 0054c74e27b7..4476d8e3c0f2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1230,6 +1230,8 @@ static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data,
1230 if (!hdev) 1230 if (!hdev)
1231 return cmd_status(sk, index, mgmt_op, ENODEV); 1231 return cmd_status(sk, index, mgmt_op, ENODEV);
1232 1232
1233 hci_dev_lock_bh(hdev);
1234
1233 if (!test_bit(HCI_UP, &hdev->flags)) { 1235 if (!test_bit(HCI_UP, &hdev->flags)) {
1234 err = cmd_status(sk, index, mgmt_op, ENETDOWN); 1236 err = cmd_status(sk, index, mgmt_op, ENETDOWN);
1235 goto failed; 1237 goto failed;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index dce8f0009a12..718b60366dfe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
389{ 389{
390 struct net_bridge_port *p; 390 struct net_bridge_port *p;
391 int err = 0; 391 int err = 0;
392 bool changed_addr;
392 393
393 /* Don't allow bridging non-ethernet like devices */ 394 /* Don't allow bridging non-ethernet like devices */
394 if ((dev->flags & IFF_LOOPBACK) || 395 if ((dev->flags & IFF_LOOPBACK) ||
@@ -446,7 +447,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
446 list_add_rcu(&p->list, &br->port_list); 447 list_add_rcu(&p->list, &br->port_list);
447 448
448 spin_lock_bh(&br->lock); 449 spin_lock_bh(&br->lock);
449 br_stp_recalculate_bridge_id(br); 450 changed_addr = br_stp_recalculate_bridge_id(br);
450 br_features_recompute(br); 451 br_features_recompute(br);
451 452
452 if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && 453 if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
@@ -456,6 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
456 457
457 br_ifinfo_notify(RTM_NEWLINK, p); 458 br_ifinfo_notify(RTM_NEWLINK, p);
458 459
460 if (changed_addr)
461 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
462
459 dev_set_mtu(br->dev, br_min_mtu(br)); 463 dev_set_mtu(br->dev, br_min_mtu(br));
460 464
461 kobject_uevent(&p->kobj, KOBJ_ADD); 465 kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 030a002ff8ee..59660c909a7c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
445 ip6h->payload_len = htons(8 + sizeof(*mldq)); 445 ip6h->payload_len = htons(8 + sizeof(*mldq));
446 ip6h->nexthdr = IPPROTO_HOPOPTS; 446 ip6h->nexthdr = IPPROTO_HOPOPTS;
447 ip6h->hop_limit = 1; 447 ip6h->hop_limit = 1;
448 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
448 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, 449 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
449 &ip6h->saddr); 450 &ip6h->saddr);
450 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); 451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
452 452
453 hopopt = (u8 *)(ip6h + 1); 453 hopopt = (u8 *)(ip6h + 1);
@@ -1475,7 +1475,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1475 ip6h->payload_len == 0) 1475 ip6h->payload_len == 0)
1476 return 0; 1476 return 0;
1477 1477
1478 len = ntohs(ip6h->payload_len); 1478 len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
1479 if (skb->len < len) 1479 if (skb->len < len)
1480 return -EINVAL; 1480 return -EINVAL;
1481 1481
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 19e2f46ed086..387013d33745 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -497,7 +497,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
497extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val); 497extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
498extern void br_stp_enable_port(struct net_bridge_port *p); 498extern void br_stp_enable_port(struct net_bridge_port *p);
499extern void br_stp_disable_port(struct net_bridge_port *p); 499extern void br_stp_disable_port(struct net_bridge_port *p);
500extern void br_stp_recalculate_bridge_id(struct net_bridge *br); 500extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
501extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); 501extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
502extern void br_stp_set_bridge_priority(struct net_bridge *br, 502extern void br_stp_set_bridge_priority(struct net_bridge *br,
503 u16 newprio); 503 u16 newprio);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 79372d4a4055..9b61d09de9b9 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -204,7 +204,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
204static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; 204static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
205 205
206/* called under bridge lock */ 206/* called under bridge lock */
207void br_stp_recalculate_bridge_id(struct net_bridge *br) 207bool br_stp_recalculate_bridge_id(struct net_bridge *br)
208{ 208{
209 const unsigned char *br_mac_zero = 209 const unsigned char *br_mac_zero =
210 (const unsigned char *)br_mac_zero_aligned; 210 (const unsigned char *)br_mac_zero_aligned;
@@ -213,7 +213,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
213 213
214 /* user has chosen a value so keep it */ 214 /* user has chosen a value so keep it */
215 if (br->flags & BR_SET_MAC_ADDR) 215 if (br->flags & BR_SET_MAC_ADDR)
216 return; 216 return false;
217 217
218 list_for_each_entry(p, &br->port_list, list) { 218 list_for_each_entry(p, &br->port_list, list) {
219 if (addr == br_mac_zero || 219 if (addr == br_mac_zero ||
@@ -222,8 +222,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
222 222
223 } 223 }
224 224
225 if (compare_ether_addr(br->bridge_id.addr, addr)) 225 if (compare_ether_addr(br->bridge_id.addr, addr) == 0)
226 br_stp_change_bridge_id(br, addr); 226 return false; /* no change */
227
228 br_stp_change_bridge_id(br, addr);
229 return true;
227} 230}
228 231
229/* called under bridge lock */ 232/* called under bridge lock */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 702be5a2c956..733d66f1b05a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -95,7 +95,7 @@ struct s_pstats can_pstats; /* receive list statistics */
95 * af_can socket functions 95 * af_can socket functions
96 */ 96 */
97 97
98static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 98int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
99{ 99{
100 struct sock *sk = sock->sk; 100 struct sock *sk = sock->sk;
101 101
@@ -108,6 +108,7 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
108 return -ENOIOCTLCMD; 108 return -ENOIOCTLCMD;
109 } 109 }
110} 110}
111EXPORT_SYMBOL(can_ioctl);
111 112
112static void can_sock_destruct(struct sock *sk) 113static void can_sock_destruct(struct sock *sk)
113{ 114{
@@ -698,13 +699,9 @@ int can_proto_register(struct can_proto *cp)
698 printk(KERN_ERR "can: protocol %d already registered\n", 699 printk(KERN_ERR "can: protocol %d already registered\n",
699 proto); 700 proto);
700 err = -EBUSY; 701 err = -EBUSY;
701 } else { 702 } else
702 proto_tab[proto] = cp; 703 proto_tab[proto] = cp;
703 704
704 /* use generic ioctl function if not defined by module */
705 if (!cp->ops->ioctl)
706 cp->ops->ioctl = can_ioctl;
707 }
708 spin_unlock(&proto_tab_lock); 705 spin_unlock(&proto_tab_lock);
709 706
710 if (err < 0) 707 if (err < 0)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 092dc88a7c64..871a0ad51025 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1569,7 +1569,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
1569 return size; 1569 return size;
1570} 1570}
1571 1571
1572static struct proto_ops bcm_ops __read_mostly = { 1572static const struct proto_ops bcm_ops = {
1573 .family = PF_CAN, 1573 .family = PF_CAN,
1574 .release = bcm_release, 1574 .release = bcm_release,
1575 .bind = sock_no_bind, 1575 .bind = sock_no_bind,
@@ -1578,7 +1578,7 @@ static struct proto_ops bcm_ops __read_mostly = {
1578 .accept = sock_no_accept, 1578 .accept = sock_no_accept,
1579 .getname = sock_no_getname, 1579 .getname = sock_no_getname,
1580 .poll = datagram_poll, 1580 .poll = datagram_poll,
1581 .ioctl = NULL, /* use can_ioctl() from af_can.c */ 1581 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
1582 .listen = sock_no_listen, 1582 .listen = sock_no_listen,
1583 .shutdown = sock_no_shutdown, 1583 .shutdown = sock_no_shutdown,
1584 .setsockopt = sock_no_setsockopt, 1584 .setsockopt = sock_no_setsockopt,
diff --git a/net/can/raw.c b/net/can/raw.c
index 883e9d74fddf..649acfa7c70a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -742,7 +742,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
742 return size; 742 return size;
743} 743}
744 744
745static struct proto_ops raw_ops __read_mostly = { 745static const struct proto_ops raw_ops = {
746 .family = PF_CAN, 746 .family = PF_CAN,
747 .release = raw_release, 747 .release = raw_release,
748 .bind = raw_bind, 748 .bind = raw_bind,
@@ -751,7 +751,7 @@ static struct proto_ops raw_ops __read_mostly = {
751 .accept = sock_no_accept, 751 .accept = sock_no_accept,
752 .getname = raw_getname, 752 .getname = raw_getname,
753 .poll = datagram_poll, 753 .poll = datagram_poll,
754 .ioctl = NULL, /* use can_ioctl() from af_can.c */ 754 .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
755 .listen = sock_no_listen, 755 .listen = sock_no_listen,
756 .shutdown = sock_no_shutdown, 756 .shutdown = sock_no_shutdown,
757 .setsockopt = raw_setsockopt, 757 .setsockopt = raw_setsockopt,
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666b0be7..1fc1ee11dfa2 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
78 while (src < end) { 78 while (src < end) {
79 int a, b, c, d; 79 int a, b, c, d;
80 80
81 if (src < end && src[0] == '\n') 81 if (src[0] == '\n') {
82 src++; 82 src++;
83 continue;
84 }
83 if (src + 4 > end) 85 if (src + 4 > end)
84 return -EINVAL; 86 return -EINVAL;
85 a = decode_bits(src[0]); 87 a = decode_bits(src[0]);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13fea0c..95f96ab94bba 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type)
62 case CEPH_MSG_OSD_MAP: return "osd_map"; 62 case CEPH_MSG_OSD_MAP: return "osd_map";
63 case CEPH_MSG_OSD_OP: return "osd_op"; 63 case CEPH_MSG_OSD_OP: return "osd_op";
64 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; 64 case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
65 case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
65 default: return "unknown"; 66 default: return "unknown";
66 } 67 }
67} 68}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a122ffa2..02212ed50852 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,15 @@
22#define OSD_OPREPLY_FRONT_LEN 512 22#define OSD_OPREPLY_FRONT_LEN 512
23 23
24static const struct ceph_connection_operations osd_con_ops; 24static const struct ceph_connection_operations osd_con_ops;
25static int __kick_requests(struct ceph_osd_client *osdc,
26 struct ceph_osd *kickosd);
27 25
28static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); 26static void send_queued(struct ceph_osd_client *osdc);
27static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
28static void __register_request(struct ceph_osd_client *osdc,
29 struct ceph_osd_request *req);
30static void __unregister_linger_request(struct ceph_osd_client *osdc,
31 struct ceph_osd_request *req);
32static int __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req);
29 34
30static int op_needs_trail(int op) 35static int op_needs_trail(int op)
31{ 36{
@@ -34,6 +39,7 @@ static int op_needs_trail(int op)
34 case CEPH_OSD_OP_SETXATTR: 39 case CEPH_OSD_OP_SETXATTR:
35 case CEPH_OSD_OP_CMPXATTR: 40 case CEPH_OSD_OP_CMPXATTR:
36 case CEPH_OSD_OP_CALL: 41 case CEPH_OSD_OP_CALL:
42 case CEPH_OSD_OP_NOTIFY:
37 return 1; 43 return 1;
38 default: 44 default:
39 return 0; 45 return 0;
@@ -209,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
209 init_completion(&req->r_completion); 215 init_completion(&req->r_completion);
210 init_completion(&req->r_safe_completion); 216 init_completion(&req->r_safe_completion);
211 INIT_LIST_HEAD(&req->r_unsafe_item); 217 INIT_LIST_HEAD(&req->r_unsafe_item);
218 INIT_LIST_HEAD(&req->r_linger_item);
219 INIT_LIST_HEAD(&req->r_linger_osd);
212 req->r_flags = flags; 220 req->r_flags = flags;
213 221
214 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); 222 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -315,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
315 break; 323 break;
316 case CEPH_OSD_OP_STARTSYNC: 324 case CEPH_OSD_OP_STARTSYNC:
317 break; 325 break;
326 case CEPH_OSD_OP_NOTIFY:
327 {
328 __le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
329 __le32 timeout = cpu_to_le32(src->watch.timeout);
330
331 BUG_ON(!req->r_trail);
332
333 ceph_pagelist_append(req->r_trail,
334 &prot_ver, sizeof(prot_ver));
335 ceph_pagelist_append(req->r_trail,
336 &timeout, sizeof(timeout));
337 }
338 case CEPH_OSD_OP_NOTIFY_ACK:
339 case CEPH_OSD_OP_WATCH:
340 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
341 dst->watch.ver = cpu_to_le64(src->watch.ver);
342 dst->watch.flag = src->watch.flag;
343 break;
318 default: 344 default:
319 pr_err("unrecognized osd opcode %d\n", dst->op); 345 pr_err("unrecognized osd opcode %d\n", dst->op);
320 WARN_ON(1); 346 WARN_ON(1);
@@ -529,6 +555,45 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
529 return NULL; 555 return NULL;
530} 556}
531 557
558/*
559 * Resubmit requests pending on the given osd.
560 */
561static void __kick_osd_requests(struct ceph_osd_client *osdc,
562 struct ceph_osd *osd)
563{
564 struct ceph_osd_request *req, *nreq;
565 int err;
566
567 dout("__kick_osd_requests osd%d\n", osd->o_osd);
568 err = __reset_osd(osdc, osd);
569 if (err == -EAGAIN)
570 return;
571
572 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
573 list_move(&req->r_req_lru_item, &osdc->req_unsent);
574 dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
575 osd->o_osd);
576 if (!req->r_linger)
577 req->r_flags |= CEPH_OSD_FLAG_RETRY;
578 }
579
580 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
581 r_linger_osd) {
582 __unregister_linger_request(osdc, req);
583 __register_request(osdc, req);
584 list_move(&req->r_req_lru_item, &osdc->req_unsent);
585 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
586 osd->o_osd);
587 }
588}
589
590static void kick_osd_requests(struct ceph_osd_client *osdc,
591 struct ceph_osd *kickosd)
592{
593 mutex_lock(&osdc->request_mutex);
594 __kick_osd_requests(osdc, kickosd);
595 mutex_unlock(&osdc->request_mutex);
596}
532 597
533/* 598/*
534 * If the osd connection drops, we need to resubmit all requests. 599 * If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +608,8 @@ static void osd_reset(struct ceph_connection *con)
543 dout("osd_reset osd%d\n", osd->o_osd); 608 dout("osd_reset osd%d\n", osd->o_osd);
544 osdc = osd->o_osdc; 609 osdc = osd->o_osdc;
545 down_read(&osdc->map_sem); 610 down_read(&osdc->map_sem);
546 kick_requests(osdc, osd); 611 kick_osd_requests(osdc, osd);
612 send_queued(osdc);
547 up_read(&osdc->map_sem); 613 up_read(&osdc->map_sem);
548} 614}
549 615
@@ -561,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
561 atomic_set(&osd->o_ref, 1); 627 atomic_set(&osd->o_ref, 1);
562 osd->o_osdc = osdc; 628 osd->o_osdc = osdc;
563 INIT_LIST_HEAD(&osd->o_requests); 629 INIT_LIST_HEAD(&osd->o_requests);
630 INIT_LIST_HEAD(&osd->o_linger_requests);
564 INIT_LIST_HEAD(&osd->o_osd_lru); 631 INIT_LIST_HEAD(&osd->o_osd_lru);
565 osd->o_incarnation = 1; 632 osd->o_incarnation = 1;
566 633
@@ -650,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
650 int ret = 0; 717 int ret = 0;
651 718
652 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 719 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
653 if (list_empty(&osd->o_requests)) { 720 if (list_empty(&osd->o_requests) &&
721 list_empty(&osd->o_linger_requests)) {
654 __remove_osd(osdc, osd); 722 __remove_osd(osdc, osd);
655 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], 723 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
656 &osd->o_con.peer_addr, 724 &osd->o_con.peer_addr,
@@ -723,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
723 * Register request, assign tid. If this is the first request, set up 791 * Register request, assign tid. If this is the first request, set up
724 * the timeout event. 792 * the timeout event.
725 */ 793 */
726static void register_request(struct ceph_osd_client *osdc, 794static void __register_request(struct ceph_osd_client *osdc,
727 struct ceph_osd_request *req) 795 struct ceph_osd_request *req)
728{ 796{
729 mutex_lock(&osdc->request_mutex);
730 req->r_tid = ++osdc->last_tid; 797 req->r_tid = ++osdc->last_tid;
731 req->r_request->hdr.tid = cpu_to_le64(req->r_tid); 798 req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
732 INIT_LIST_HEAD(&req->r_req_lru_item); 799 INIT_LIST_HEAD(&req->r_req_lru_item);
@@ -740,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc,
740 dout(" first request, scheduling timeout\n"); 807 dout(" first request, scheduling timeout\n");
741 __schedule_osd_timeout(osdc); 808 __schedule_osd_timeout(osdc);
742 } 809 }
810}
811
812static void register_request(struct ceph_osd_client *osdc,
813 struct ceph_osd_request *req)
814{
815 mutex_lock(&osdc->request_mutex);
816 __register_request(osdc, req);
743 mutex_unlock(&osdc->request_mutex); 817 mutex_unlock(&osdc->request_mutex);
744} 818}
745 819
@@ -758,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc,
758 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 832 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
759 833
760 list_del_init(&req->r_osd_item); 834 list_del_init(&req->r_osd_item);
761 if (list_empty(&req->r_osd->o_requests)) 835 if (list_empty(&req->r_osd->o_requests) &&
836 list_empty(&req->r_osd->o_linger_requests)) {
837 dout("moving osd to %p lru\n", req->r_osd);
762 __move_osd_to_lru(osdc, req->r_osd); 838 __move_osd_to_lru(osdc, req->r_osd);
763 req->r_osd = NULL; 839 }
840 if (list_empty(&req->r_osd_item) &&
841 list_empty(&req->r_linger_item))
842 req->r_osd = NULL;
764 } 843 }
765 844
766 ceph_osdc_put_request(req); 845 ceph_osdc_put_request(req);
@@ -781,20 +860,72 @@ static void __cancel_request(struct ceph_osd_request *req)
781 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 860 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
782 req->r_sent = 0; 861 req->r_sent = 0;
783 } 862 }
784 list_del_init(&req->r_req_lru_item);
785} 863}
786 864
865static void __register_linger_request(struct ceph_osd_client *osdc,
866 struct ceph_osd_request *req)
867{
868 dout("__register_linger_request %p\n", req);
869 list_add_tail(&req->r_linger_item, &osdc->req_linger);
870 list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
871}
872
873static void __unregister_linger_request(struct ceph_osd_client *osdc,
874 struct ceph_osd_request *req)
875{
876 dout("__unregister_linger_request %p\n", req);
877 if (req->r_osd) {
878 list_del_init(&req->r_linger_item);
879 list_del_init(&req->r_linger_osd);
880
881 if (list_empty(&req->r_osd->o_requests) &&
882 list_empty(&req->r_osd->o_linger_requests)) {
883 dout("moving osd to %p lru\n", req->r_osd);
884 __move_osd_to_lru(osdc, req->r_osd);
885 }
886 req->r_osd = NULL;
887 }
888}
889
890void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
891 struct ceph_osd_request *req)
892{
893 mutex_lock(&osdc->request_mutex);
894 if (req->r_linger) {
895 __unregister_linger_request(osdc, req);
896 ceph_osdc_put_request(req);
897 }
898 mutex_unlock(&osdc->request_mutex);
899}
900EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
901
902void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
903 struct ceph_osd_request *req)
904{
905 if (!req->r_linger) {
906 dout("set_request_linger %p\n", req);
907 req->r_linger = 1;
908 /*
909 * caller is now responsible for calling
910 * unregister_linger_request
911 */
912 ceph_osdc_get_request(req);
913 }
914}
915EXPORT_SYMBOL(ceph_osdc_set_request_linger);
916
787/* 917/*
788 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct 918 * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
789 * (as needed), and set the request r_osd appropriately. If there is 919 * (as needed), and set the request r_osd appropriately. If there is
790 * no up osd, set r_osd to NULL. 920 * no up osd, set r_osd to NULL. Move the request to the appropiate list
921 * (unsent, homeless) or leave on in-flight lru.
791 * 922 *
792 * Return 0 if unchanged, 1 if changed, or negative on error. 923 * Return 0 if unchanged, 1 if changed, or negative on error.
793 * 924 *
794 * Caller should hold map_sem for read and request_mutex. 925 * Caller should hold map_sem for read and request_mutex.
795 */ 926 */
796static int __map_osds(struct ceph_osd_client *osdc, 927static int __map_request(struct ceph_osd_client *osdc,
797 struct ceph_osd_request *req) 928 struct ceph_osd_request *req)
798{ 929{
799 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 930 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
800 struct ceph_pg pgid; 931 struct ceph_pg pgid;
@@ -802,11 +933,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
802 int o = -1, num = 0; 933 int o = -1, num = 0;
803 int err; 934 int err;
804 935
805 dout("map_osds %p tid %lld\n", req, req->r_tid); 936 dout("map_request %p tid %lld\n", req, req->r_tid);
806 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, 937 err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
807 &req->r_file_layout, osdc->osdmap); 938 &req->r_file_layout, osdc->osdmap);
808 if (err) 939 if (err) {
940 list_move(&req->r_req_lru_item, &osdc->req_notarget);
809 return err; 941 return err;
942 }
810 pgid = reqhead->layout.ol_pgid; 943 pgid = reqhead->layout.ol_pgid;
811 req->r_pgid = pgid; 944 req->r_pgid = pgid;
812 945
@@ -823,7 +956,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
823 (req->r_osd == NULL && o == -1)) 956 (req->r_osd == NULL && o == -1))
824 return 0; /* no change */ 957 return 0; /* no change */
825 958
826 dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", 959 dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
827 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 960 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
828 req->r_osd ? req->r_osd->o_osd : -1); 961 req->r_osd ? req->r_osd->o_osd : -1);
829 962
@@ -841,10 +974,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
841 if (!req->r_osd && o >= 0) { 974 if (!req->r_osd && o >= 0) {
842 err = -ENOMEM; 975 err = -ENOMEM;
843 req->r_osd = create_osd(osdc); 976 req->r_osd = create_osd(osdc);
844 if (!req->r_osd) 977 if (!req->r_osd) {
978 list_move(&req->r_req_lru_item, &osdc->req_notarget);
845 goto out; 979 goto out;
980 }
846 981
847 dout("map_osds osd %p is osd%d\n", req->r_osd, o); 982 dout("map_request osd %p is osd%d\n", req->r_osd, o);
848 req->r_osd->o_osd = o; 983 req->r_osd->o_osd = o;
849 req->r_osd->o_con.peer_name.num = cpu_to_le64(o); 984 req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
850 __insert_osd(osdc, req->r_osd); 985 __insert_osd(osdc, req->r_osd);
@@ -855,6 +990,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
855 if (req->r_osd) { 990 if (req->r_osd) {
856 __remove_osd_from_lru(req->r_osd); 991 __remove_osd_from_lru(req->r_osd);
857 list_add(&req->r_osd_item, &req->r_osd->o_requests); 992 list_add(&req->r_osd_item, &req->r_osd->o_requests);
993 list_move(&req->r_req_lru_item, &osdc->req_unsent);
994 } else {
995 list_move(&req->r_req_lru_item, &osdc->req_notarget);
858 } 996 }
859 err = 1; /* osd or pg changed */ 997 err = 1; /* osd or pg changed */
860 998
@@ -869,16 +1007,6 @@ static int __send_request(struct ceph_osd_client *osdc,
869 struct ceph_osd_request *req) 1007 struct ceph_osd_request *req)
870{ 1008{
871 struct ceph_osd_request_head *reqhead; 1009 struct ceph_osd_request_head *reqhead;
872 int err;
873
874 err = __map_osds(osdc, req);
875 if (err < 0)
876 return err;
877 if (req->r_osd == NULL) {
878 dout("send_request %p no up osds in pg\n", req);
879 ceph_monc_request_next_osdmap(&osdc->client->monc);
880 return 0;
881 }
882 1010
883 dout("send_request %p tid %llu to osd%d flags %d\n", 1011 dout("send_request %p tid %llu to osd%d flags %d\n",
884 req, req->r_tid, req->r_osd->o_osd, req->r_flags); 1012 req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -898,6 +1026,21 @@ static int __send_request(struct ceph_osd_client *osdc,
898} 1026}
899 1027
900/* 1028/*
1029 * Send any requests in the queue (req_unsent).
1030 */
1031static void send_queued(struct ceph_osd_client *osdc)
1032{
1033 struct ceph_osd_request *req, *tmp;
1034
1035 dout("send_queued\n");
1036 mutex_lock(&osdc->request_mutex);
1037 list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
1038 __send_request(osdc, req);
1039 }
1040 mutex_unlock(&osdc->request_mutex);
1041}
1042
1043/*
901 * Timeout callback, called every N seconds when 1 or more osd 1044 * Timeout callback, called every N seconds when 1 or more osd
902 * requests has been active for more than N seconds. When this 1045 * requests has been active for more than N seconds. When this
903 * happens, we ping all OSDs with requests who have timed out to 1046 * happens, we ping all OSDs with requests who have timed out to
@@ -916,30 +1059,13 @@ static void handle_timeout(struct work_struct *work)
916 unsigned long keepalive = 1059 unsigned long keepalive =
917 osdc->client->options->osd_keepalive_timeout * HZ; 1060 osdc->client->options->osd_keepalive_timeout * HZ;
918 unsigned long last_stamp = 0; 1061 unsigned long last_stamp = 0;
919 struct rb_node *p;
920 struct list_head slow_osds; 1062 struct list_head slow_osds;
921
922 dout("timeout\n"); 1063 dout("timeout\n");
923 down_read(&osdc->map_sem); 1064 down_read(&osdc->map_sem);
924 1065
925 ceph_monc_request_next_osdmap(&osdc->client->monc); 1066 ceph_monc_request_next_osdmap(&osdc->client->monc);
926 1067
927 mutex_lock(&osdc->request_mutex); 1068 mutex_lock(&osdc->request_mutex);
928 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
929 req = rb_entry(p, struct ceph_osd_request, r_node);
930
931 if (req->r_resend) {
932 int err;
933
934 dout("osdc resending prev failed %lld\n", req->r_tid);
935 err = __send_request(osdc, req);
936 if (err)
937 dout("osdc failed again on %lld\n", req->r_tid);
938 else
939 req->r_resend = false;
940 continue;
941 }
942 }
943 1069
944 /* 1070 /*
945 * reset osds that appear to be _really_ unresponsive. this 1071 * reset osds that appear to be _really_ unresponsive. this
@@ -963,7 +1089,7 @@ static void handle_timeout(struct work_struct *work)
963 BUG_ON(!osd); 1089 BUG_ON(!osd);
964 pr_warning(" tid %llu timed out on osd%d, will reset osd\n", 1090 pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
965 req->r_tid, osd->o_osd); 1091 req->r_tid, osd->o_osd);
966 __kick_requests(osdc, osd); 1092 __kick_osd_requests(osdc, osd);
967 } 1093 }
968 1094
969 /* 1095 /*
@@ -991,7 +1117,7 @@ static void handle_timeout(struct work_struct *work)
991 1117
992 __schedule_osd_timeout(osdc); 1118 __schedule_osd_timeout(osdc);
993 mutex_unlock(&osdc->request_mutex); 1119 mutex_unlock(&osdc->request_mutex);
994 1120 send_queued(osdc);
995 up_read(&osdc->map_sem); 1121 up_read(&osdc->map_sem);
996} 1122}
997 1123
@@ -1035,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1035 numops * sizeof(struct ceph_osd_op)) 1161 numops * sizeof(struct ceph_osd_op))
1036 goto bad; 1162 goto bad;
1037 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); 1163 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
1038
1039 /* lookup */ 1164 /* lookup */
1040 mutex_lock(&osdc->request_mutex); 1165 mutex_lock(&osdc->request_mutex);
1041 req = __lookup_request(osdc, tid); 1166 req = __lookup_request(osdc, tid);
@@ -1079,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1079 1204
1080 dout("handle_reply tid %llu flags %d\n", tid, flags); 1205 dout("handle_reply tid %llu flags %d\n", tid, flags);
1081 1206
1207 if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
1208 __register_linger_request(osdc, req);
1209
1082 /* either this is a read, or we got the safe response */ 1210 /* either this is a read, or we got the safe response */
1083 if (result < 0 || 1211 if (result < 0 ||
1084 (flags & CEPH_OSD_FLAG_ONDISK) || 1212 (flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1099,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1099 } 1227 }
1100 1228
1101done: 1229done:
1230 dout("req=%p req->r_linger=%d\n", req, req->r_linger);
1102 ceph_osdc_put_request(req); 1231 ceph_osdc_put_request(req);
1103 return; 1232 return;
1104 1233
@@ -1109,108 +1238,83 @@ bad:
1109 ceph_msg_dump(msg); 1238 ceph_msg_dump(msg);
1110} 1239}
1111 1240
1112 1241static void reset_changed_osds(struct ceph_osd_client *osdc)
1113static int __kick_requests(struct ceph_osd_client *osdc,
1114 struct ceph_osd *kickosd)
1115{ 1242{
1116 struct ceph_osd_request *req;
1117 struct rb_node *p, *n; 1243 struct rb_node *p, *n;
1118 int needmap = 0;
1119 int err;
1120 1244
1121 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 1245 for (p = rb_first(&osdc->osds); p; p = n) {
1122 if (kickosd) { 1246 struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
1123 err = __reset_osd(osdc, kickosd); 1247
1124 if (err == -EAGAIN) 1248 n = rb_next(p);
1125 return 1; 1249 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1126 } else { 1250 memcmp(&osd->o_con.peer_addr,
1127 for (p = rb_first(&osdc->osds); p; p = n) { 1251 ceph_osd_addr(osdc->osdmap,
1128 struct ceph_osd *osd = 1252 osd->o_osd),
1129 rb_entry(p, struct ceph_osd, o_node); 1253 sizeof(struct ceph_entity_addr)) != 0)
1130 1254 __reset_osd(osdc, osd);
1131 n = rb_next(p);
1132 if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
1133 memcmp(&osd->o_con.peer_addr,
1134 ceph_osd_addr(osdc->osdmap,
1135 osd->o_osd),
1136 sizeof(struct ceph_entity_addr)) != 0)
1137 __reset_osd(osdc, osd);
1138 }
1139 } 1255 }
1256}
1257
1258/*
1259 * Requeue requests whose mapping to an OSD has changed. If requests map to
1260 * no osd, request a new map.
1261 *
1262 * Caller should hold map_sem for read and request_mutex.
1263 */
1264static void kick_requests(struct ceph_osd_client *osdc)
1265{
1266 struct ceph_osd_request *req, *nreq;
1267 struct rb_node *p;
1268 int needmap = 0;
1269 int err;
1140 1270
1271 dout("kick_requests\n");
1272 mutex_lock(&osdc->request_mutex);
1141 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 1273 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
1142 req = rb_entry(p, struct ceph_osd_request, r_node); 1274 req = rb_entry(p, struct ceph_osd_request, r_node);
1143 1275 err = __map_request(osdc, req);
1144 if (req->r_resend) { 1276 if (err < 0)
1145 dout(" r_resend set on tid %llu\n", req->r_tid); 1277 continue; /* error */
1146 __cancel_request(req); 1278 if (req->r_osd == NULL) {
1147 goto kick; 1279 dout("%p tid %llu maps to no osd\n", req, req->r_tid);
1148 } 1280 needmap++; /* request a newer map */
1149 if (req->r_osd && kickosd == req->r_osd) { 1281 } else if (err > 0) {
1150 __cancel_request(req); 1282 dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
1151 goto kick; 1283 req->r_osd ? req->r_osd->o_osd : -1);
1284 if (!req->r_linger)
1285 req->r_flags |= CEPH_OSD_FLAG_RETRY;
1152 } 1286 }
1287 }
1288
1289 list_for_each_entry_safe(req, nreq, &osdc->req_linger,
1290 r_linger_item) {
1291 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
1153 1292
1154 err = __map_osds(osdc, req); 1293 err = __map_request(osdc, req);
1155 if (err == 0) 1294 if (err == 0)
1156 continue; /* no change */ 1295 continue; /* no change and no osd was specified */
1157 if (err < 0) { 1296 if (err < 0)
1158 /* 1297 continue; /* hrm! */
1159 * FIXME: really, we should set the request
1160 * error and fail if this isn't a 'nofail'
1161 * request, but that's a fair bit more
1162 * complicated to do. So retry!
1163 */
1164 dout(" setting r_resend on %llu\n", req->r_tid);
1165 req->r_resend = true;
1166 continue;
1167 }
1168 if (req->r_osd == NULL) { 1298 if (req->r_osd == NULL) {
1169 dout("tid %llu maps to no valid osd\n", req->r_tid); 1299 dout("tid %llu maps to no valid osd\n", req->r_tid);
1170 needmap++; /* request a newer map */ 1300 needmap++; /* request a newer map */
1171 continue; 1301 continue;
1172 } 1302 }
1173 1303
1174kick: 1304 dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
1175 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
1176 req->r_osd ? req->r_osd->o_osd : -1); 1305 req->r_osd ? req->r_osd->o_osd : -1);
1177 req->r_flags |= CEPH_OSD_FLAG_RETRY; 1306 __unregister_linger_request(osdc, req);
1178 err = __send_request(osdc, req); 1307 __register_request(osdc, req);
1179 if (err) {
1180 dout(" setting r_resend on %llu\n", req->r_tid);
1181 req->r_resend = true;
1182 }
1183 } 1308 }
1184
1185 return needmap;
1186}
1187
1188/*
1189 * Resubmit osd requests whose osd or osd address has changed. Request
1190 * a new osd map if osds are down, or we are otherwise unable to determine
1191 * how to direct a request.
1192 *
1193 * Close connections to down osds.
1194 *
1195 * If @who is specified, resubmit requests for that specific osd.
1196 *
1197 * Caller should hold map_sem for read and request_mutex.
1198 */
1199static void kick_requests(struct ceph_osd_client *osdc,
1200 struct ceph_osd *kickosd)
1201{
1202 int needmap;
1203
1204 mutex_lock(&osdc->request_mutex);
1205 needmap = __kick_requests(osdc, kickosd);
1206 mutex_unlock(&osdc->request_mutex); 1309 mutex_unlock(&osdc->request_mutex);
1207 1310
1208 if (needmap) { 1311 if (needmap) {
1209 dout("%d requests for down osds, need new map\n", needmap); 1312 dout("%d requests for down osds, need new map\n", needmap);
1210 ceph_monc_request_next_osdmap(&osdc->client->monc); 1313 ceph_monc_request_next_osdmap(&osdc->client->monc);
1211 } 1314 }
1212
1213} 1315}
1316
1317
1214/* 1318/*
1215 * Process updated osd map. 1319 * Process updated osd map.
1216 * 1320 *
@@ -1263,6 +1367,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1263 ceph_osdmap_destroy(osdc->osdmap); 1367 ceph_osdmap_destroy(osdc->osdmap);
1264 osdc->osdmap = newmap; 1368 osdc->osdmap = newmap;
1265 } 1369 }
1370 kick_requests(osdc);
1371 reset_changed_osds(osdc);
1266 } else { 1372 } else {
1267 dout("ignoring incremental map %u len %d\n", 1373 dout("ignoring incremental map %u len %d\n",
1268 epoch, maplen); 1374 epoch, maplen);
@@ -1300,6 +1406,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1300 osdc->osdmap = newmap; 1406 osdc->osdmap = newmap;
1301 if (oldmap) 1407 if (oldmap)
1302 ceph_osdmap_destroy(oldmap); 1408 ceph_osdmap_destroy(oldmap);
1409 kick_requests(osdc);
1303 } 1410 }
1304 p += maplen; 1411 p += maplen;
1305 nr_maps--; 1412 nr_maps--;
@@ -1308,8 +1415,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1308done: 1415done:
1309 downgrade_write(&osdc->map_sem); 1416 downgrade_write(&osdc->map_sem);
1310 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 1417 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
1311 if (newmap) 1418 send_queued(osdc);
1312 kick_requests(osdc, NULL);
1313 up_read(&osdc->map_sem); 1419 up_read(&osdc->map_sem);
1314 wake_up_all(&osdc->client->auth_wq); 1420 wake_up_all(&osdc->client->auth_wq);
1315 return; 1421 return;
@@ -1322,6 +1428,223 @@ bad:
1322} 1428}
1323 1429
1324/* 1430/*
1431 * watch/notify callback event infrastructure
1432 *
1433 * These callbacks are used both for watch and notify operations.
1434 */
1435static void __release_event(struct kref *kref)
1436{
1437 struct ceph_osd_event *event =
1438 container_of(kref, struct ceph_osd_event, kref);
1439
1440 dout("__release_event %p\n", event);
1441 kfree(event);
1442}
1443
1444static void get_event(struct ceph_osd_event *event)
1445{
1446 kref_get(&event->kref);
1447}
1448
1449void ceph_osdc_put_event(struct ceph_osd_event *event)
1450{
1451 kref_put(&event->kref, __release_event);
1452}
1453EXPORT_SYMBOL(ceph_osdc_put_event);
1454
1455static void __insert_event(struct ceph_osd_client *osdc,
1456 struct ceph_osd_event *new)
1457{
1458 struct rb_node **p = &osdc->event_tree.rb_node;
1459 struct rb_node *parent = NULL;
1460 struct ceph_osd_event *event = NULL;
1461
1462 while (*p) {
1463 parent = *p;
1464 event = rb_entry(parent, struct ceph_osd_event, node);
1465 if (new->cookie < event->cookie)
1466 p = &(*p)->rb_left;
1467 else if (new->cookie > event->cookie)
1468 p = &(*p)->rb_right;
1469 else
1470 BUG();
1471 }
1472
1473 rb_link_node(&new->node, parent, p);
1474 rb_insert_color(&new->node, &osdc->event_tree);
1475}
1476
1477static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
1478 u64 cookie)
1479{
1480 struct rb_node **p = &osdc->event_tree.rb_node;
1481 struct rb_node *parent = NULL;
1482 struct ceph_osd_event *event = NULL;
1483
1484 while (*p) {
1485 parent = *p;
1486 event = rb_entry(parent, struct ceph_osd_event, node);
1487 if (cookie < event->cookie)
1488 p = &(*p)->rb_left;
1489 else if (cookie > event->cookie)
1490 p = &(*p)->rb_right;
1491 else
1492 return event;
1493 }
1494 return NULL;
1495}
1496
1497static void __remove_event(struct ceph_osd_event *event)
1498{
1499 struct ceph_osd_client *osdc = event->osdc;
1500
1501 if (!RB_EMPTY_NODE(&event->node)) {
1502 dout("__remove_event removed %p\n", event);
1503 rb_erase(&event->node, &osdc->event_tree);
1504 ceph_osdc_put_event(event);
1505 } else {
1506 dout("__remove_event didn't remove %p\n", event);
1507 }
1508}
1509
1510int ceph_osdc_create_event(struct ceph_osd_client *osdc,
1511 void (*event_cb)(u64, u64, u8, void *),
1512 int one_shot, void *data,
1513 struct ceph_osd_event **pevent)
1514{
1515 struct ceph_osd_event *event;
1516
1517 event = kmalloc(sizeof(*event), GFP_NOIO);
1518 if (!event)
1519 return -ENOMEM;
1520
1521 dout("create_event %p\n", event);
1522 event->cb = event_cb;
1523 event->one_shot = one_shot;
1524 event->data = data;
1525 event->osdc = osdc;
1526 INIT_LIST_HEAD(&event->osd_node);
1527 kref_init(&event->kref); /* one ref for us */
1528 kref_get(&event->kref); /* one ref for the caller */
1529 init_completion(&event->completion);
1530
1531 spin_lock(&osdc->event_lock);
1532 event->cookie = ++osdc->event_count;
1533 __insert_event(osdc, event);
1534 spin_unlock(&osdc->event_lock);
1535
1536 *pevent = event;
1537 return 0;
1538}
1539EXPORT_SYMBOL(ceph_osdc_create_event);
1540
1541void ceph_osdc_cancel_event(struct ceph_osd_event *event)
1542{
1543 struct ceph_osd_client *osdc = event->osdc;
1544
1545 dout("cancel_event %p\n", event);
1546 spin_lock(&osdc->event_lock);
1547 __remove_event(event);
1548 spin_unlock(&osdc->event_lock);
1549 ceph_osdc_put_event(event); /* caller's */
1550}
1551EXPORT_SYMBOL(ceph_osdc_cancel_event);
1552
1553
1554static void do_event_work(struct work_struct *work)
1555{
1556 struct ceph_osd_event_work *event_work =
1557 container_of(work, struct ceph_osd_event_work, work);
1558 struct ceph_osd_event *event = event_work->event;
1559 u64 ver = event_work->ver;
1560 u64 notify_id = event_work->notify_id;
1561 u8 opcode = event_work->opcode;
1562
1563 dout("do_event_work completing %p\n", event);
1564 event->cb(ver, notify_id, opcode, event->data);
1565 complete(&event->completion);
1566 dout("do_event_work completed %p\n", event);
1567 ceph_osdc_put_event(event);
1568 kfree(event_work);
1569}
1570
1571
1572/*
1573 * Process osd watch notifications
1574 */
1575void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1576{
1577 void *p, *end;
1578 u8 proto_ver;
1579 u64 cookie, ver, notify_id;
1580 u8 opcode;
1581 struct ceph_osd_event *event;
1582 struct ceph_osd_event_work *event_work;
1583
1584 p = msg->front.iov_base;
1585 end = p + msg->front.iov_len;
1586
1587 ceph_decode_8_safe(&p, end, proto_ver, bad);
1588 ceph_decode_8_safe(&p, end, opcode, bad);
1589 ceph_decode_64_safe(&p, end, cookie, bad);
1590 ceph_decode_64_safe(&p, end, ver, bad);
1591 ceph_decode_64_safe(&p, end, notify_id, bad);
1592
1593 spin_lock(&osdc->event_lock);
1594 event = __find_event(osdc, cookie);
1595 if (event) {
1596 get_event(event);
1597 if (event->one_shot)
1598 __remove_event(event);
1599 }
1600 spin_unlock(&osdc->event_lock);
1601 dout("handle_watch_notify cookie %lld ver %lld event %p\n",
1602 cookie, ver, event);
1603 if (event) {
1604 event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
1605 INIT_WORK(&event_work->work, do_event_work);
1606 if (!event_work) {
1607 dout("ERROR: could not allocate event_work\n");
1608 goto done_err;
1609 }
1610 event_work->event = event;
1611 event_work->ver = ver;
1612 event_work->notify_id = notify_id;
1613 event_work->opcode = opcode;
1614 if (!queue_work(osdc->notify_wq, &event_work->work)) {
1615 dout("WARNING: failed to queue notify event work\n");
1616 goto done_err;
1617 }
1618 }
1619
1620 return;
1621
1622done_err:
1623 complete(&event->completion);
1624 ceph_osdc_put_event(event);
1625 return;
1626
1627bad:
1628 pr_err("osdc handle_watch_notify corrupt msg\n");
1629 return;
1630}
1631
1632int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
1633{
1634 int err;
1635
1636 dout("wait_event %p\n", event);
1637 err = wait_for_completion_interruptible_timeout(&event->completion,
1638 timeout * HZ);
1639 ceph_osdc_put_event(event);
1640 if (err > 0)
1641 err = 0;
1642 dout("wait_event %p returns %d\n", event, err);
1643 return err;
1644}
1645EXPORT_SYMBOL(ceph_osdc_wait_event);
1646
1647/*
1325 * Register request, send initial attempt. 1648 * Register request, send initial attempt.
1326 */ 1649 */
1327int ceph_osdc_start_request(struct ceph_osd_client *osdc, 1650int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1347,15 +1670,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1347 * the request still han't been touched yet. 1670 * the request still han't been touched yet.
1348 */ 1671 */
1349 if (req->r_sent == 0) { 1672 if (req->r_sent == 0) {
1350 rc = __send_request(osdc, req); 1673 rc = __map_request(osdc, req);
1351 if (rc) { 1674 if (rc < 0)
1352 if (nofail) { 1675 return rc;
1353 dout("osdc_start_request failed send, " 1676 if (req->r_osd == NULL) {
1354 " marking %lld\n", req->r_tid); 1677 dout("send_request %p no up osds in pg\n", req);
1355 req->r_resend = true; 1678 ceph_monc_request_next_osdmap(&osdc->client->monc);
1356 rc = 0; 1679 } else {
1357 } else { 1680 rc = __send_request(osdc, req);
1358 __unregister_request(osdc, req); 1681 if (rc) {
1682 if (nofail) {
1683 dout("osdc_start_request failed send, "
1684 " will retry %lld\n", req->r_tid);
1685 rc = 0;
1686 } else {
1687 __unregister_request(osdc, req);
1688 }
1359 } 1689 }
1360 } 1690 }
1361 } 1691 }
@@ -1441,9 +1771,15 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1441 INIT_LIST_HEAD(&osdc->osd_lru); 1771 INIT_LIST_HEAD(&osdc->osd_lru);
1442 osdc->requests = RB_ROOT; 1772 osdc->requests = RB_ROOT;
1443 INIT_LIST_HEAD(&osdc->req_lru); 1773 INIT_LIST_HEAD(&osdc->req_lru);
1774 INIT_LIST_HEAD(&osdc->req_unsent);
1775 INIT_LIST_HEAD(&osdc->req_notarget);
1776 INIT_LIST_HEAD(&osdc->req_linger);
1444 osdc->num_requests = 0; 1777 osdc->num_requests = 0;
1445 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); 1778 INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
1446 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); 1779 INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
1780 spin_lock_init(&osdc->event_lock);
1781 osdc->event_tree = RB_ROOT;
1782 osdc->event_count = 0;
1447 1783
1448 schedule_delayed_work(&osdc->osds_timeout_work, 1784 schedule_delayed_work(&osdc->osds_timeout_work,
1449 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 1785 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1463,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
1463 "osd_op_reply"); 1799 "osd_op_reply");
1464 if (err < 0) 1800 if (err < 0)
1465 goto out_msgpool; 1801 goto out_msgpool;
1802
1803 osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
1804 if (IS_ERR(osdc->notify_wq)) {
1805 err = PTR_ERR(osdc->notify_wq);
1806 osdc->notify_wq = NULL;
1807 goto out_msgpool;
1808 }
1466 return 0; 1809 return 0;
1467 1810
1468out_msgpool: 1811out_msgpool:
@@ -1476,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
1476 1819
1477void ceph_osdc_stop(struct ceph_osd_client *osdc) 1820void ceph_osdc_stop(struct ceph_osd_client *osdc)
1478{ 1821{
1822 flush_workqueue(osdc->notify_wq);
1823 destroy_workqueue(osdc->notify_wq);
1479 cancel_delayed_work_sync(&osdc->timeout_work); 1824 cancel_delayed_work_sync(&osdc->timeout_work);
1480 cancel_delayed_work_sync(&osdc->osds_timeout_work); 1825 cancel_delayed_work_sync(&osdc->osds_timeout_work);
1481 if (osdc->osdmap) { 1826 if (osdc->osdmap) {
@@ -1483,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
1483 osdc->osdmap = NULL; 1828 osdc->osdmap = NULL;
1484 } 1829 }
1485 remove_old_osds(osdc, 1); 1830 remove_old_osds(osdc, 1);
1831 WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
1486 mempool_destroy(osdc->req_mempool); 1832 mempool_destroy(osdc->req_mempool);
1487 ceph_msgpool_destroy(&osdc->msgpool_op); 1833 ceph_msgpool_destroy(&osdc->msgpool_op);
1488 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 1834 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1591,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
1591 case CEPH_MSG_OSD_OPREPLY: 1937 case CEPH_MSG_OSD_OPREPLY:
1592 handle_reply(osdc, msg, con); 1938 handle_reply(osdc, msg, con);
1593 break; 1939 break;
1940 case CEPH_MSG_WATCH_NOTIFY:
1941 handle_watch_notify(osdc, msg);
1942 break;
1594 1943
1595 default: 1944 default:
1596 pr_err("received unknown message type %d %s\n", type, 1945 pr_err("received unknown message type %d %s\n", type,
@@ -1684,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
1684 2033
1685 switch (type) { 2034 switch (type) {
1686 case CEPH_MSG_OSD_MAP: 2035 case CEPH_MSG_OSD_MAP:
2036 case CEPH_MSG_WATCH_NOTIFY:
1687 return ceph_msg_new(type, front, GFP_NOFS); 2037 return ceph_msg_new(type, front, GFP_NOFS);
1688 case CEPH_MSG_OSD_OPREPLY: 2038 case CEPH_MSG_OSD_OPREPLY:
1689 return get_reply(con, hdr, skip); 2039 return get_reply(con, hdr, skip);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b88eba97dab..3da9fb06d47a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1140,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
1140 1140
1141 ASSERT_RTNL(); 1141 ASSERT_RTNL();
1142 1142
1143 /*
1144 * Is it even present?
1145 */
1146 if (!netif_device_present(dev)) 1143 if (!netif_device_present(dev))
1147 return -ENODEV; 1144 return -ENODEV;
1148 1145
@@ -1151,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
1151 if (ret) 1148 if (ret)
1152 return ret; 1149 return ret;
1153 1150
1154 /*
1155 * Call device private open method
1156 */
1157 set_bit(__LINK_STATE_START, &dev->state); 1151 set_bit(__LINK_STATE_START, &dev->state);
1158 1152
1159 if (ops->ndo_validate_addr) 1153 if (ops->ndo_validate_addr)
@@ -1162,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
1162 if (!ret && ops->ndo_open) 1156 if (!ret && ops->ndo_open)
1163 ret = ops->ndo_open(dev); 1157 ret = ops->ndo_open(dev);
1164 1158
1165 /*
1166 * If it went open OK then:
1167 */
1168
1169 if (ret) 1159 if (ret)
1170 clear_bit(__LINK_STATE_START, &dev->state); 1160 clear_bit(__LINK_STATE_START, &dev->state);
1171 else { 1161 else {
1172 /*
1173 * Set the flags.
1174 */
1175 dev->flags |= IFF_UP; 1162 dev->flags |= IFF_UP;
1176
1177 /*
1178 * Enable NET_DMA
1179 */
1180 net_dmaengine_get(); 1163 net_dmaengine_get();
1181
1182 /*
1183 * Initialize multicasting status
1184 */
1185 dev_set_rx_mode(dev); 1164 dev_set_rx_mode(dev);
1186
1187 /*
1188 * Wakeup transmit queue engine
1189 */
1190 dev_activate(dev); 1165 dev_activate(dev);
1191 } 1166 }
1192 1167
@@ -1209,22 +1184,13 @@ int dev_open(struct net_device *dev)
1209{ 1184{
1210 int ret; 1185 int ret;
1211 1186
1212 /*
1213 * Is it already up?
1214 */
1215 if (dev->flags & IFF_UP) 1187 if (dev->flags & IFF_UP)
1216 return 0; 1188 return 0;
1217 1189
1218 /*
1219 * Open device
1220 */
1221 ret = __dev_open(dev); 1190 ret = __dev_open(dev);
1222 if (ret < 0) 1191 if (ret < 0)
1223 return ret; 1192 return ret;
1224 1193
1225 /*
1226 * ... and announce new interface.
1227 */
1228 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1194 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1229 call_netdevice_notifiers(NETDEV_UP, dev); 1195 call_netdevice_notifiers(NETDEV_UP, dev);
1230 1196
@@ -1240,10 +1206,6 @@ static int __dev_close_many(struct list_head *head)
1240 might_sleep(); 1206 might_sleep();
1241 1207
1242 list_for_each_entry(dev, head, unreg_list) { 1208 list_for_each_entry(dev, head, unreg_list) {
1243 /*
1244 * Tell people we are going down, so that they can
1245 * prepare to death, when device is still operating.
1246 */
1247 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1209 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1248 1210
1249 clear_bit(__LINK_STATE_START, &dev->state); 1211 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1272,15 +1234,7 @@ static int __dev_close_many(struct list_head *head)
1272 if (ops->ndo_stop) 1234 if (ops->ndo_stop)
1273 ops->ndo_stop(dev); 1235 ops->ndo_stop(dev);
1274 1236
1275 /*
1276 * Device is now down.
1277 */
1278
1279 dev->flags &= ~IFF_UP; 1237 dev->flags &= ~IFF_UP;
1280
1281 /*
1282 * Shutdown NET_DMA
1283 */
1284 net_dmaengine_put(); 1238 net_dmaengine_put();
1285 } 1239 }
1286 1240
@@ -1309,9 +1263,6 @@ static int dev_close_many(struct list_head *head)
1309 1263
1310 __dev_close_many(head); 1264 __dev_close_many(head);
1311 1265
1312 /*
1313 * Tell people we are down
1314 */
1315 list_for_each_entry(dev, head, unreg_list) { 1266 list_for_each_entry(dev, head, unreg_list) {
1316 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1267 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1317 call_netdevice_notifiers(NETDEV_DOWN, dev); 1268 call_netdevice_notifiers(NETDEV_DOWN, dev);
@@ -1353,14 +1304,17 @@ EXPORT_SYMBOL(dev_close);
1353 */ 1304 */
1354void dev_disable_lro(struct net_device *dev) 1305void dev_disable_lro(struct net_device *dev)
1355{ 1306{
1356 if (dev->ethtool_ops && dev->ethtool_ops->get_flags && 1307 u32 flags;
1357 dev->ethtool_ops->set_flags) { 1308
1358 u32 flags = dev->ethtool_ops->get_flags(dev); 1309 if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
1359 if (flags & ETH_FLAG_LRO) { 1310 flags = dev->ethtool_ops->get_flags(dev);
1360 flags &= ~ETH_FLAG_LRO; 1311 else
1361 dev->ethtool_ops->set_flags(dev, flags); 1312 flags = ethtool_op_get_flags(dev);
1362 } 1313
1363 } 1314 if (!(flags & ETH_FLAG_LRO))
1315 return;
1316
1317 __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
1364 WARN_ON(dev->features & NETIF_F_LRO); 1318 WARN_ON(dev->features & NETIF_F_LRO);
1365} 1319}
1366EXPORT_SYMBOL(dev_disable_lro); 1320EXPORT_SYMBOL(dev_disable_lro);
@@ -1368,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro);
1368 1322
1369static int dev_boot_phase = 1; 1323static int dev_boot_phase = 1;
1370 1324
1371/*
1372 * Device change register/unregister. These are not inline or static
1373 * as we export them to the world.
1374 */
1375
1376/** 1325/**
1377 * register_netdevice_notifier - register a network notifier block 1326 * register_netdevice_notifier - register a network notifier block
1378 * @nb: notifier 1327 * @nb: notifier
@@ -1474,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1474 ASSERT_RTNL(); 1423 ASSERT_RTNL();
1475 return raw_notifier_call_chain(&netdev_chain, val, dev); 1424 return raw_notifier_call_chain(&netdev_chain, val, dev);
1476} 1425}
1426EXPORT_SYMBOL(call_netdevice_notifiers);
1477 1427
1478/* When > 0 there are consumers of rx skb time stamps */ 1428/* When > 0 there are consumers of rx skb time stamps */
1479static atomic_t netstamp_needed = ATOMIC_INIT(0); 1429static atomic_t netstamp_needed = ATOMIC_INIT(0);
@@ -1504,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
1504 __net_timestamp(skb); 1454 __net_timestamp(skb);
1505} 1455}
1506 1456
1457static inline bool is_skb_forwardable(struct net_device *dev,
1458 struct sk_buff *skb)
1459{
1460 unsigned int len;
1461
1462 if (!(dev->flags & IFF_UP))
1463 return false;
1464
1465 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1466 if (skb->len <= len)
1467 return true;
1468
1469 /* if TSO is enabled, we don't care about the length as the packet
1470 * could be forwarded without being segmented before
1471 */
1472 if (skb_is_gso(skb))
1473 return true;
1474
1475 return false;
1476}
1477
1507/** 1478/**
1508 * dev_forward_skb - loopback an skb to another netif 1479 * dev_forward_skb - loopback an skb to another netif
1509 * 1480 *
@@ -1527,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1527 skb_orphan(skb); 1498 skb_orphan(skb);
1528 nf_reset(skb); 1499 nf_reset(skb);
1529 1500
1530 if (unlikely(!(dev->flags & IFF_UP) || 1501 if (unlikely(!is_skb_forwardable(dev, skb))) {
1531 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1532 atomic_long_inc(&dev->rx_dropped); 1502 atomic_long_inc(&dev->rx_dropped);
1533 kfree_skb(skb); 1503 kfree_skb(skb);
1534 return NET_RX_DROP; 1504 return NET_RX_DROP;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce9..706502ff64aa 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
350 struct per_cpu_dm_data *data; 350 struct per_cpu_dm_data *data;
351 int cpu, rc; 351 int cpu, rc;
352 352
353 printk(KERN_INFO "Initalizing network drop monitor service\n"); 353 printk(KERN_INFO "Initializing network drop monitor service\n");
354 354
355 if (sizeof(void *) > 8) { 355 if (sizeof(void *) > 8) {
356 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 356 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a1086fb0c0c7..74ead9eca126 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -141,9 +141,24 @@ u32 ethtool_op_get_flags(struct net_device *dev)
141} 141}
142EXPORT_SYMBOL(ethtool_op_get_flags); 142EXPORT_SYMBOL(ethtool_op_get_flags);
143 143
144/* Check if device can enable (or disable) particular feature coded in "data"
145 * argument. Flags "supported" describe features that can be toggled by device.
146 * If feature can not be toggled, it state (enabled or disabled) must match
147 * hardcoded device features state, otherwise flags are marked as invalid.
148 */
149bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
150{
151 u32 features = dev->features & flags_dup_features;
152 /* "data" can contain only flags_dup_features bits,
153 * see __ethtool_set_flags */
154
155 return (features & ~supported) != (data & ~supported);
156}
157EXPORT_SYMBOL(ethtool_invalid_flags);
158
144int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) 159int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
145{ 160{
146 if (data & ~supported) 161 if (ethtool_invalid_flags(dev, data, supported))
147 return -EINVAL; 162 return -EINVAL;
148 163
149 dev->features = ((dev->features & ~flags_dup_features) | 164 dev->features = ((dev->features & ~flags_dup_features) |
@@ -513,7 +528,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
513 } 528 }
514} 529}
515 530
516static int __ethtool_set_flags(struct net_device *dev, u32 data) 531int __ethtool_set_flags(struct net_device *dev, u32 data)
517{ 532{
518 u32 changed; 533 u32 changed;
519 534
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0c55eaa70e39..aeeece72b72f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu)
3761 list_add_tail(&t->th_list, &pktgen_threads); 3761 list_add_tail(&t->th_list, &pktgen_threads);
3762 init_completion(&t->start_done); 3762 init_completion(&t->start_done);
3763 3763
3764 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3764 p = kthread_create_on_node(pktgen_thread_worker,
3765 t,
3766 cpu_to_node(cpu),
3767 "kpktgend_%d", cpu);
3765 if (IS_ERR(p)) { 3768 if (IS_ERR(p)) {
3766 pr_err("kernel_thread() failed for cpu %d\n", t->cpu); 3769 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3767 list_del(&t->th_list); 3770 list_del(&t->th_list);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090d273d7865..1b74d3b64371 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
215 case ARPHRD_INFINIBAND: 215 case ARPHRD_INFINIBAND:
216 ip_ib_mc_map(addr, dev->broadcast, haddr); 216 ip_ib_mc_map(addr, dev->broadcast, haddr);
217 return 0; 217 return 0;
218 case ARPHRD_IPGRE:
219 ip_ipgre_mc_map(addr, dev->broadcast, haddr);
220 return 0;
218 default: 221 default:
219 if (dir) { 222 if (dir) {
220 memcpy(haddr, dev->broadcast, dev->addr_len); 223 memcpy(haddr, dev->broadcast, dev->addr_len);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6d85800daeb7..5345b0bee6df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,6 +64,8 @@
64#include <net/rtnetlink.h> 64#include <net/rtnetlink.h>
65#include <net/net_namespace.h> 65#include <net/net_namespace.h>
66 66
67#include "fib_lookup.h"
68
67static struct ipv4_devconf ipv4_devconf = { 69static struct ipv4_devconf ipv4_devconf = {
68 .data = { 70 .data = {
69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
151 break; 153 break;
152 } 154 }
153 } 155 }
156 if (!result) {
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
160
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
163 */
164 local = fib_get_table(net, RT_TABLE_LOCAL);
165 if (local &&
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
169 }
154 if (result && devref) 170 if (result && devref)
155 dev_hold(result); 171 dev_hold(result);
156 rcu_read_unlock(); 172 rcu_read_unlock();
@@ -345,6 +361,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
345 } 361 }
346 } 362 }
347 363
364 /* On promotion all secondaries from subnet are changing
365 * the primary IP, we must remove all their routes silently
366 * and later to add them back with new prefsrc. Do this
367 * while all addresses are on the device list.
368 */
369 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 if (ifa1->ifa_mask == ifa->ifa_mask &&
371 inet_ifa_match(ifa1->ifa_address, ifa))
372 fib_del_ifaddr(ifa, ifa1);
373 }
374
348 /* 2. Unlink it */ 375 /* 2. Unlink it */
349 376
350 *ifap = ifa1->ifa_next; 377 *ifap = ifa1->ifa_next;
@@ -364,6 +391,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
364 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 391 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
365 392
366 if (promote) { 393 if (promote) {
394 struct in_ifaddr *next_sec = promote->ifa_next;
367 395
368 if (prev_prom) { 396 if (prev_prom) {
369 prev_prom->ifa_next = promote->ifa_next; 397 prev_prom->ifa_next = promote->ifa_next;
@@ -375,7 +403,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
375 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 403 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
376 blocking_notifier_call_chain(&inetaddr_chain, 404 blocking_notifier_call_chain(&inetaddr_chain,
377 NETDEV_UP, promote); 405 NETDEV_UP, promote);
378 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 406 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
379 if (ifa1->ifa_mask != ifa->ifa_mask || 407 if (ifa1->ifa_mask != ifa->ifa_mask ||
380 !inet_ifa_match(ifa1->ifa_address, ifa)) 408 !inet_ifa_match(ifa1->ifa_address, ifa))
381 continue; 409 continue;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a373a259253c..451088330bbb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
228 if (res.type != RTN_LOCAL || !accept_local) 228 if (res.type != RTN_LOCAL || !accept_local)
229 goto e_inval; 229 goto e_inval;
230 } 230 }
231 *spec_dst = FIB_RES_PREFSRC(res); 231 *spec_dst = FIB_RES_PREFSRC(net, res);
232 fib_combine_itag(itag, &res); 232 fib_combine_itag(itag, &res);
233 dev_match = false; 233 dev_match = false;
234 234
@@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
258 ret = 0; 258 ret = 0;
259 if (fib_lookup(net, &fl4, &res) == 0) { 259 if (fib_lookup(net, &fl4, &res) == 0) {
260 if (res.type == RTN_UNICAST) { 260 if (res.type == RTN_UNICAST) {
261 *spec_dst = FIB_RES_PREFSRC(res); 261 *spec_dst = FIB_RES_PREFSRC(net, res);
262 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 262 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
263 } 263 }
264 } 264 }
@@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
722 } 722 }
723} 723}
724 724
725static void fib_del_ifaddr(struct in_ifaddr *ifa) 725/* Delete primary or secondary address.
726 * Optionally, on secondary address promotion consider the addresses
727 * from subnet iprim as deleted, even if they are in device list.
728 * In this case the secondary ifa can be in device list.
729 */
730void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
726{ 731{
727 struct in_device *in_dev = ifa->ifa_dev; 732 struct in_device *in_dev = ifa->ifa_dev;
728 struct net_device *dev = in_dev->dev; 733 struct net_device *dev = in_dev->dev;
729 struct in_ifaddr *ifa1; 734 struct in_ifaddr *ifa1;
730 struct in_ifaddr *prim = ifa; 735 struct in_ifaddr *prim = ifa, *prim1 = NULL;
731 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; 736 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
732 __be32 any = ifa->ifa_address & ifa->ifa_mask; 737 __be32 any = ifa->ifa_address & ifa->ifa_mask;
733#define LOCAL_OK 1 738#define LOCAL_OK 1
@@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
735#define BRD0_OK 4 740#define BRD0_OK 4
736#define BRD1_OK 8 741#define BRD1_OK 8
737 unsigned ok = 0; 742 unsigned ok = 0;
743 int subnet = 0; /* Primary network */
744 int gone = 1; /* Address is missing */
745 int same_prefsrc = 0; /* Another primary with same IP */
738 746
739 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 747 if (ifa->ifa_flags & IFA_F_SECONDARY) {
740 fib_magic(RTM_DELROUTE,
741 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
742 any, ifa->ifa_prefixlen, prim);
743 else {
744 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 748 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
745 if (prim == NULL) { 749 if (prim == NULL) {
746 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 750 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
747 return; 751 return;
748 } 752 }
753 if (iprim && iprim != prim) {
754 printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
755 return;
756 }
757 } else if (!ipv4_is_zeronet(any) &&
758 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
759 fib_magic(RTM_DELROUTE,
760 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
761 any, ifa->ifa_prefixlen, prim);
762 subnet = 1;
749 } 763 }
750 764
751 /* Deletion is more complicated than add. 765 /* Deletion is more complicated than add.
@@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
755 */ 769 */
756 770
757 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 771 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
772 if (ifa1 == ifa) {
773 /* promotion, keep the IP */
774 gone = 0;
775 continue;
776 }
777 /* Ignore IFAs from our subnet */
778 if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
779 inet_ifa_match(ifa1->ifa_address, iprim))
780 continue;
781
782 /* Ignore ifa1 if it uses different primary IP (prefsrc) */
783 if (ifa1->ifa_flags & IFA_F_SECONDARY) {
784 /* Another address from our subnet? */
785 if (ifa1->ifa_mask == prim->ifa_mask &&
786 inet_ifa_match(ifa1->ifa_address, prim))
787 prim1 = prim;
788 else {
789 /* We reached the secondaries, so
790 * same_prefsrc should be determined.
791 */
792 if (!same_prefsrc)
793 continue;
794 /* Search new prim1 if ifa1 is not
795 * using the current prim1
796 */
797 if (!prim1 ||
798 ifa1->ifa_mask != prim1->ifa_mask ||
799 !inet_ifa_match(ifa1->ifa_address, prim1))
800 prim1 = inet_ifa_byprefix(in_dev,
801 ifa1->ifa_address,
802 ifa1->ifa_mask);
803 if (!prim1)
804 continue;
805 if (prim1->ifa_local != prim->ifa_local)
806 continue;
807 }
808 } else {
809 if (prim->ifa_local != ifa1->ifa_local)
810 continue;
811 prim1 = ifa1;
812 if (prim != prim1)
813 same_prefsrc = 1;
814 }
758 if (ifa->ifa_local == ifa1->ifa_local) 815 if (ifa->ifa_local == ifa1->ifa_local)
759 ok |= LOCAL_OK; 816 ok |= LOCAL_OK;
760 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 817 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
763 ok |= BRD1_OK; 820 ok |= BRD1_OK;
764 if (any == ifa1->ifa_broadcast) 821 if (any == ifa1->ifa_broadcast)
765 ok |= BRD0_OK; 822 ok |= BRD0_OK;
823 /* primary has network specific broadcasts */
824 if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
825 __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
826 __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
827
828 if (!ipv4_is_zeronet(any1)) {
829 if (ifa->ifa_broadcast == brd1 ||
830 ifa->ifa_broadcast == any1)
831 ok |= BRD_OK;
832 if (brd == brd1 || brd == any1)
833 ok |= BRD1_OK;
834 if (any == brd1 || any == any1)
835 ok |= BRD0_OK;
836 }
837 }
766 } 838 }
767 839
768 if (!(ok & BRD_OK)) 840 if (!(ok & BRD_OK))
769 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 841 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
770 if (!(ok & BRD1_OK)) 842 if (subnet && ifa->ifa_prefixlen < 31) {
771 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 843 if (!(ok & BRD1_OK))
772 if (!(ok & BRD0_OK)) 844 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
773 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 845 if (!(ok & BRD0_OK))
846 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
847 }
774 if (!(ok & LOCAL_OK)) { 848 if (!(ok & LOCAL_OK)) {
775 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 849 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
776 850
777 /* Check, that this local address finally disappeared. */ 851 /* Check, that this local address finally disappeared. */
778 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 852 if (gone &&
853 inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
779 /* And the last, but not the least thing. 854 /* And the last, but not the least thing.
780 * We must flush stray FIB entries. 855 * We must flush stray FIB entries.
781 * 856 *
@@ -885,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
885{ 960{
886 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 961 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
887 struct net_device *dev = ifa->ifa_dev->dev; 962 struct net_device *dev = ifa->ifa_dev->dev;
963 struct net *net = dev_net(dev);
888 964
889 switch (event) { 965 switch (event) {
890 case NETDEV_UP: 966 case NETDEV_UP:
@@ -892,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
892#ifdef CONFIG_IP_ROUTE_MULTIPATH 968#ifdef CONFIG_IP_ROUTE_MULTIPATH
893 fib_sync_up(dev); 969 fib_sync_up(dev);
894#endif 970#endif
895 fib_update_nh_saddrs(dev); 971 atomic_inc(&net->ipv4.dev_addr_genid);
896 rt_cache_flush(dev_net(dev), -1); 972 rt_cache_flush(dev_net(dev), -1);
897 break; 973 break;
898 case NETDEV_DOWN: 974 case NETDEV_DOWN:
899 fib_del_ifaddr(ifa); 975 fib_del_ifaddr(ifa, NULL);
900 fib_update_nh_saddrs(dev); 976 atomic_inc(&net->ipv4.dev_addr_genid);
901 if (ifa->ifa_dev->ifa_list == NULL) { 977 if (ifa->ifa_dev->ifa_list == NULL) {
902 /* Last address was deleted from this interface. 978 /* Last address was deleted from this interface.
903 * Disable IP. 979 * Disable IP.
@@ -915,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
915{ 991{
916 struct net_device *dev = ptr; 992 struct net_device *dev = ptr;
917 struct in_device *in_dev = __in_dev_get_rtnl(dev); 993 struct in_device *in_dev = __in_dev_get_rtnl(dev);
994 struct net *net = dev_net(dev);
918 995
919 if (event == NETDEV_UNREGISTER) { 996 if (event == NETDEV_UNREGISTER) {
920 fib_disable_ip(dev, 2, -1); 997 fib_disable_ip(dev, 2, -1);
@@ -932,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
932#ifdef CONFIG_IP_ROUTE_MULTIPATH 1009#ifdef CONFIG_IP_ROUTE_MULTIPATH
933 fib_sync_up(dev); 1010 fib_sync_up(dev);
934#endif 1011#endif
1012 atomic_inc(&net->ipv4.dev_addr_genid);
935 rt_cache_flush(dev_net(dev), -1); 1013 rt_cache_flush(dev_net(dev), -1);
936 break; 1014 break;
937 case NETDEV_DOWN: 1015 case NETDEV_DOWN:
@@ -990,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net)
990 fib4_rules_exit(net); 1068 fib4_rules_exit(net);
991#endif 1069#endif
992 1070
1071 rtnl_lock();
993 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1072 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
994 struct fib_table *tb; 1073 struct fib_table *tb;
995 struct hlist_head *head; 1074 struct hlist_head *head;
@@ -1002,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net)
1002 fib_free_table(tb); 1081 fib_free_table(tb);
1003 } 1082 }
1004 } 1083 }
1084 rtnl_unlock();
1005 kfree(net->ipv4.fib_table_hash); 1085 kfree(net->ipv4.fib_table_hash);
1006} 1086}
1007 1087
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 4ec323875a02..af0f14aba169 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -10,7 +10,6 @@ struct fib_alias {
10 struct fib_info *fa_info; 10 struct fib_info *fa_info;
11 u8 fa_tos; 11 u8 fa_tos;
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_scope;
14 u8 fa_state; 13 u8 fa_state;
15 struct rcu_head rcu; 14 struct rcu_head rcu;
16}; 15};
@@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *);
29extern struct fib_info *fib_create_info(struct fib_config *cfg); 28extern struct fib_info *fib_create_info(struct fib_config *cfg);
30extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); 29extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 30extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
32 u32 tb_id, u8 type, u8 scope, __be32 dst, 31 u32 tb_id, u8 type, __be32 dst,
33 int dst_len, u8 tos, struct fib_info *fi, 32 int dst_len, u8 tos, struct fib_info *fi,
34 unsigned int); 33 unsigned int);
35extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 34extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 622ac4c95026..641a5a2a9f9c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
222 unsigned int mask = (fib_info_hash_size - 1); 222 unsigned int mask = (fib_info_hash_size - 1);
223 unsigned int val = fi->fib_nhs; 223 unsigned int val = fi->fib_nhs;
224 224
225 val ^= fi->fib_protocol; 225 val ^= (fi->fib_protocol << 8) | fi->fib_scope;
226 val ^= (__force u32)fi->fib_prefsrc; 226 val ^= (__force u32)fi->fib_prefsrc;
227 val ^= fi->fib_priority; 227 val ^= fi->fib_priority;
228 for_nexthops(fi) { 228 for_nexthops(fi) {
@@ -248,10 +248,11 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
248 if (fi->fib_nhs != nfi->fib_nhs) 248 if (fi->fib_nhs != nfi->fib_nhs)
249 continue; 249 continue;
250 if (nfi->fib_protocol == fi->fib_protocol && 250 if (nfi->fib_protocol == fi->fib_protocol &&
251 nfi->fib_scope == fi->fib_scope &&
251 nfi->fib_prefsrc == fi->fib_prefsrc && 252 nfi->fib_prefsrc == fi->fib_prefsrc &&
252 nfi->fib_priority == fi->fib_priority && 253 nfi->fib_priority == fi->fib_priority &&
253 memcmp(nfi->fib_metrics, fi->fib_metrics, 254 memcmp(nfi->fib_metrics, fi->fib_metrics,
254 sizeof(fi->fib_metrics)) == 0 && 255 sizeof(u32) * RTAX_MAX) == 0 &&
255 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && 256 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
256 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 257 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
257 return fi; 258 return fi;
@@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
328 goto errout; 329 goto errout;
329 330
330 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 331 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
331 fa->fa_type, fa->fa_scope, key, dst_len, 332 fa->fa_type, key, dst_len,
332 fa->fa_tos, fa->fa_info, nlm_flags); 333 fa->fa_tos, fa->fa_info, nlm_flags);
333 if (err < 0) { 334 if (err < 0) {
334 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 335 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -695,6 +696,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
695 fib_info_hash_free(old_laddrhash, bytes); 696 fib_info_hash_free(old_laddrhash, bytes);
696} 697}
697 698
699__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
700{
701 nh->nh_saddr = inet_select_addr(nh->nh_dev,
702 nh->nh_gw,
703 nh->nh_parent->fib_scope);
704 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
705
706 return nh->nh_saddr;
707}
708
698struct fib_info *fib_create_info(struct fib_config *cfg) 709struct fib_info *fib_create_info(struct fib_config *cfg)
699{ 710{
700 int err; 711 int err;
@@ -753,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
753 764
754 fi->fib_net = hold_net(net); 765 fi->fib_net = hold_net(net);
755 fi->fib_protocol = cfg->fc_protocol; 766 fi->fib_protocol = cfg->fc_protocol;
767 fi->fib_scope = cfg->fc_scope;
756 fi->fib_flags = cfg->fc_flags; 768 fi->fib_flags = cfg->fc_flags;
757 fi->fib_priority = cfg->fc_priority; 769 fi->fib_priority = cfg->fc_priority;
758 fi->fib_prefsrc = cfg->fc_prefsrc; 770 fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -854,10 +866,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
854 } 866 }
855 867
856 change_nexthops(fi) { 868 change_nexthops(fi) {
857 nexthop_nh->nh_cfg_scope = cfg->fc_scope; 869 fib_info_update_nh_saddr(net, nexthop_nh);
858 nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
859 nexthop_nh->nh_gw,
860 nexthop_nh->nh_cfg_scope);
861 } endfor_nexthops(fi) 870 } endfor_nexthops(fi)
862 871
863link_it: 872link_it:
@@ -906,7 +915,7 @@ failure:
906} 915}
907 916
908int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 917int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
909 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 918 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
910 struct fib_info *fi, unsigned int flags) 919 struct fib_info *fi, unsigned int flags)
911{ 920{
912 struct nlmsghdr *nlh; 921 struct nlmsghdr *nlh;
@@ -928,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
928 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 937 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
929 rtm->rtm_type = type; 938 rtm->rtm_type = type;
930 rtm->rtm_flags = fi->fib_flags; 939 rtm->rtm_flags = fi->fib_flags;
931 rtm->rtm_scope = scope; 940 rtm->rtm_scope = fi->fib_scope;
932 rtm->rtm_protocol = fi->fib_protocol; 941 rtm->rtm_protocol = fi->fib_protocol;
933 942
934 if (rtm->rtm_dst_len) 943 if (rtm->rtm_dst_len)
@@ -1084,7 +1093,7 @@ void fib_select_default(struct fib_result *res)
1084 list_for_each_entry_rcu(fa, fa_head, fa_list) { 1093 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1085 struct fib_info *next_fi = fa->fa_info; 1094 struct fib_info *next_fi = fa->fa_info;
1086 1095
1087 if (fa->fa_scope != res->scope || 1096 if (next_fi->fib_scope != res->scope ||
1088 fa->fa_type != RTN_UNICAST) 1097 fa->fa_type != RTN_UNICAST)
1089 continue; 1098 continue;
1090 1099
@@ -1128,24 +1137,6 @@ out:
1128 return; 1137 return;
1129} 1138}
1130 1139
1131void fib_update_nh_saddrs(struct net_device *dev)
1132{
1133 struct hlist_head *head;
1134 struct hlist_node *node;
1135 struct fib_nh *nh;
1136 unsigned int hash;
1137
1138 hash = fib_devindex_hashfn(dev->ifindex);
1139 head = &fib_info_devhash[hash];
1140 hlist_for_each_entry(nh, node, head, nh_hash) {
1141 if (nh->nh_dev != dev)
1142 continue;
1143 nh->nh_saddr = inet_select_addr(nh->nh_dev,
1144 nh->nh_gw,
1145 nh->nh_cfg_scope);
1146 }
1147}
1148
1149#ifdef CONFIG_IP_ROUTE_MULTIPATH 1140#ifdef CONFIG_IP_ROUTE_MULTIPATH
1150 1141
1151/* 1142/*
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3d28a35c2e1a..b92c86f6e9b3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1245 if (fa->fa_info->fib_priority != fi->fib_priority) 1245 if (fa->fa_info->fib_priority != fi->fib_priority)
1246 break; 1246 break;
1247 if (fa->fa_type == cfg->fc_type && 1247 if (fa->fa_type == cfg->fc_type &&
1248 fa->fa_scope == cfg->fc_scope &&
1249 fa->fa_info == fi) { 1248 fa->fa_info == fi) {
1250 fa_match = fa; 1249 fa_match = fa;
1251 break; 1250 break;
@@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1271 new_fa->fa_tos = fa->fa_tos; 1270 new_fa->fa_tos = fa->fa_tos;
1272 new_fa->fa_info = fi; 1271 new_fa->fa_info = fi;
1273 new_fa->fa_type = cfg->fc_type; 1272 new_fa->fa_type = cfg->fc_type;
1274 new_fa->fa_scope = cfg->fc_scope;
1275 state = fa->fa_state; 1273 state = fa->fa_state;
1276 new_fa->fa_state = state & ~FA_S_ACCESSED; 1274 new_fa->fa_state = state & ~FA_S_ACCESSED;
1277 1275
@@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1308 new_fa->fa_info = fi; 1306 new_fa->fa_info = fi;
1309 new_fa->fa_tos = tos; 1307 new_fa->fa_tos = tos;
1310 new_fa->fa_type = cfg->fc_type; 1308 new_fa->fa_type = cfg->fc_type;
1311 new_fa->fa_scope = cfg->fc_scope;
1312 new_fa->fa_state = 0; 1309 new_fa->fa_state = 0;
1313 /* 1310 /*
1314 * Insert new entry to the list. 1311 * Insert new entry to the list.
@@ -1362,15 +1359,15 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1362 1359
1363 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1360 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1364 continue; 1361 continue;
1365 if (fa->fa_scope < flp->flowi4_scope) 1362 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1366 continue; 1363 continue;
1367 fib_alias_accessed(fa); 1364 fib_alias_accessed(fa);
1368 err = fib_props[fa->fa_type].error; 1365 err = fib_props[fa->fa_type].error;
1369 if (err) { 1366 if (err) {
1370#ifdef CONFIG_IP_FIB_TRIE_STATS 1367#ifdef CONFIG_IP_FIB_TRIE_STATS
1371 t->stats.semantic_match_miss++; 1368 t->stats.semantic_match_passed++;
1372#endif 1369#endif
1373 return 1; 1370 return err;
1374 } 1371 }
1375 if (fi->fib_flags & RTNH_F_DEAD) 1372 if (fi->fib_flags & RTNH_F_DEAD)
1376 continue; 1373 continue;
@@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1388 res->prefixlen = plen; 1385 res->prefixlen = plen;
1389 res->nh_sel = nhsel; 1386 res->nh_sel = nhsel;
1390 res->type = fa->fa_type; 1387 res->type = fa->fa_type;
1391 res->scope = fa->fa_scope; 1388 res->scope = fa->fa_info->fib_scope;
1392 res->fi = fi; 1389 res->fi = fi;
1393 res->table = tb; 1390 res->table = tb;
1394 res->fa_head = &li->falh; 1391 res->fa_head = &li->falh;
@@ -1664,7 +1661,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1664 1661
1665 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 1662 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1666 (cfg->fc_scope == RT_SCOPE_NOWHERE || 1663 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1667 fa->fa_scope == cfg->fc_scope) && 1664 fa->fa_info->fib_scope == cfg->fc_scope) &&
1665 (!cfg->fc_prefsrc ||
1666 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1668 (!cfg->fc_protocol || 1667 (!cfg->fc_protocol ||
1669 fi->fib_protocol == cfg->fc_protocol) && 1668 fi->fib_protocol == cfg->fc_protocol) &&
1670 fib_nh_match(cfg, fi) == 0) { 1669 fib_nh_match(cfg, fi) == 0) {
@@ -1861,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1861 RTM_NEWROUTE, 1860 RTM_NEWROUTE,
1862 tb->tb_id, 1861 tb->tb_id,
1863 fa->fa_type, 1862 fa->fa_type,
1864 fa->fa_scope,
1865 xkey, 1863 xkey,
1866 plen, 1864 plen,
1867 fa->fa_tos, 1865 fa->fa_tos,
@@ -2382,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2382 seq_indent(seq, iter->depth+1); 2380 seq_indent(seq, iter->depth+1);
2383 seq_printf(seq, " /%d %s %s", li->plen, 2381 seq_printf(seq, " /%d %s %s", li->plen,
2384 rtn_scope(buf1, sizeof(buf1), 2382 rtn_scope(buf1, sizeof(buf1),
2385 fa->fa_scope), 2383 fa->fa_info->fib_scope),
2386 rtn_type(buf2, sizeof(buf2), 2384 rtn_type(buf2, sizeof(buf2),
2387 fa->fa_type)); 2385 fa->fa_type));
2388 if (fa->fa_tos) 2386 if (fa->fa_tos)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1906fa35860c..28a736f3442f 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
140 } else { 140 } else {
141 dopt->ts_needtime = 0; 141 dopt->ts_needtime = 0;
142 142
143 if (soffset + 8 <= optlen) { 143 if (soffset + 7 <= optlen) {
144 __be32 addr; 144 __be32 addr;
145 145
146 memcpy(&addr, sptr+soffset-1, 4); 146 memcpy(&addr, dptr+soffset-1, 4);
147 if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { 147 if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
148 dopt->ts_needtime = 1; 148 dopt->ts_needtime = 1;
149 soffset += 8; 149 soffset += 8;
150 } 150 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f3c0b549b8e1..4614babdc45f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
221 return csum; 221 return csum;
222} 222}
223 223
224static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) 224static int nf_ip_route(struct net *net, struct dst_entry **dst,
225 struct flowi *fl, bool strict __always_unused)
225{ 226{
226 struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); 227 struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
227 if (IS_ERR(rt)) 228 if (IS_ERR(rt))
228 return PTR_ERR(rt); 229 return PTR_ERR(rt);
229 *dst = &rt->dst; 230 *dst = &rt->dst;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e837ffd3edc3..2d3c72e5bbbf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
569 rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 569 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
570 if (IS_ERR(rt)) { 570 if (IS_ERR(rt)) {
571 err = PTR_ERR(rt); 571 err = PTR_ERR(rt);
572 rt = NULL;
572 goto done; 573 goto done;
573 } 574 }
574 } 575 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 870b5182ddd8..4b0c81180804 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1593 rt->rt_peer_genid = rt_peer_genid(); 1593 rt->rt_peer_genid = rt_peer_genid();
1594 } 1594 }
1595 check_peer_pmtu(dst, peer); 1595 check_peer_pmtu(dst, peer);
1596
1597 inet_putpeer(peer);
1598 } 1596 }
1599} 1597}
1600 1598
@@ -1720,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1720 1718
1721 rcu_read_lock(); 1719 rcu_read_lock();
1722 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) 1720 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1723 src = FIB_RES_PREFSRC(res); 1721 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1724 else 1722 else
1725 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1723 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1726 RT_SCOPE_UNIVERSE); 1724 RT_SCOPE_UNIVERSE);
@@ -2617,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net,
2617 fib_select_default(&res); 2615 fib_select_default(&res);
2618 2616
2619 if (!fl4.saddr) 2617 if (!fl4.saddr)
2620 fl4.saddr = FIB_RES_PREFSRC(res); 2618 fl4.saddr = FIB_RES_PREFSRC(net, res);
2621 2619
2622 dev_out = FIB_RES_DEV(res); 2620 dev_out = FIB_RES_DEV(res);
2623 fl4.flowi4_oif = dev_out->ifindex; 2621 fl4.flowi4_oif = dev_out->ifindex;
@@ -3221,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net)
3221{ 3219{
3222 get_random_bytes(&net->ipv4.rt_genid, 3220 get_random_bytes(&net->ipv4.rt_genid,
3223 sizeof(net->ipv4.rt_genid)); 3221 sizeof(net->ipv4.rt_genid));
3222 get_random_bytes(&net->ipv4.dev_addr_genid,
3223 sizeof(net->ipv4.dev_addr_genid));
3224 return 0; 3224 return 0;
3225} 3225}
3226 3226
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da782e7ab16d..bef9f04c22ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2659#define DBGUNDO(x...) do { } while (0) 2659#define DBGUNDO(x...) do { } while (0)
2660#endif 2660#endif
2661 2661
2662static void tcp_undo_cwr(struct sock *sk, const int undo) 2662static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2663{ 2663{
2664 struct tcp_sock *tp = tcp_sk(sk); 2664 struct tcp_sock *tp = tcp_sk(sk);
2665 2665
@@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2671 else 2671 else
2672 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); 2672 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2673 2673
2674 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { 2674 if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
2675 tp->snd_ssthresh = tp->prior_ssthresh; 2675 tp->snd_ssthresh = tp->prior_ssthresh;
2676 TCP_ECN_withdraw_cwr(tp); 2676 TCP_ECN_withdraw_cwr(tp);
2677 } 2677 }
2678 } else { 2678 } else {
2679 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); 2679 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2680 } 2680 }
2681 tcp_moderate_cwnd(tp);
2682 tp->snd_cwnd_stamp = tcp_time_stamp; 2681 tp->snd_cwnd_stamp = tcp_time_stamp;
2683} 2682}
2684 2683
@@ -2699,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
2699 * or our original transmission succeeded. 2698 * or our original transmission succeeded.
2700 */ 2699 */
2701 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); 2700 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2702 tcp_undo_cwr(sk, 1); 2701 tcp_undo_cwr(sk, true);
2703 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) 2702 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2704 mib_idx = LINUX_MIB_TCPLOSSUNDO; 2703 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2705 else 2704 else
@@ -2726,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
2726 2725
2727 if (tp->undo_marker && !tp->undo_retrans) { 2726 if (tp->undo_marker && !tp->undo_retrans) {
2728 DBGUNDO(sk, "D-SACK"); 2727 DBGUNDO(sk, "D-SACK");
2729 tcp_undo_cwr(sk, 1); 2728 tcp_undo_cwr(sk, true);
2730 tp->undo_marker = 0; 2729 tp->undo_marker = 0;
2731 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2730 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2732 } 2731 }
@@ -2779,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2779 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2778 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2780 2779
2781 DBGUNDO(sk, "Hoe"); 2780 DBGUNDO(sk, "Hoe");
2782 tcp_undo_cwr(sk, 0); 2781 tcp_undo_cwr(sk, false);
2783 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); 2782 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2784 2783
2785 /* So... Do not make Hoe's retransmit yet. 2784 /* So... Do not make Hoe's retransmit yet.
@@ -2808,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk)
2808 2807
2809 DBGUNDO(sk, "partial loss"); 2808 DBGUNDO(sk, "partial loss");
2810 tp->lost_out = 0; 2809 tp->lost_out = 0;
2811 tcp_undo_cwr(sk, 1); 2810 tcp_undo_cwr(sk, true);
2812 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2811 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2813 inet_csk(sk)->icsk_retransmits = 0; 2812 inet_csk(sk)->icsk_retransmits = 0;
2814 tp->undo_marker = 0; 2813 tp->undo_marker = 0;
@@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk)
2822static inline void tcp_complete_cwr(struct sock *sk) 2821static inline void tcp_complete_cwr(struct sock *sk)
2823{ 2822{
2824 struct tcp_sock *tp = tcp_sk(sk); 2823 struct tcp_sock *tp = tcp_sk(sk);
2825 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2824 /* Do not moderate cwnd if it's already undone in cwr or recovery */
2826 tp->snd_cwnd_stamp = tcp_time_stamp; 2825 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
2826 tp->snd_cwnd = tp->snd_ssthresh;
2827 tp->snd_cwnd_stamp = tcp_time_stamp;
2828 }
2827 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2829 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2828} 2830}
2829 2831
@@ -3494,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
3494 if (flag & FLAG_ECE) 3496 if (flag & FLAG_ECE)
3495 tcp_ratehalving_spur_to_response(sk); 3497 tcp_ratehalving_spur_to_response(sk);
3496 else 3498 else
3497 tcp_undo_cwr(sk, 1); 3499 tcp_undo_cwr(sk, true);
3498} 3500}
3499 3501
3500/* F-RTO spurious RTO detection algorithm (RFC4138) 3502/* F-RTO spurious RTO detection algorithm (RFC4138)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7ff0343e05c7..29e48593bf22 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -663,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb)
663 skb_pull(skb, (u8 *)encap - skb->data); 663 skb_pull(skb, (u8 *)encap - skb->data);
664 skb_reset_network_header(skb); 664 skb_reset_network_header(skb);
665 skb->protocol = htons(ETH_P_IPV6); 665 skb->protocol = htons(ETH_P_IPV6);
666 skb->ip_summed = 0; 666 skb->ip_summed = CHECKSUM_NONE;
667 skb->pkt_type = PACKET_HOST; 667 skb->pkt_type = PACKET_HOST;
668 668
669 skb_tunnel_rx(skb, reg_dev); 669 skb_tunnel_rx(skb, reg_dev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0e49c9db3c98..92f952d093db 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -341,6 +341,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
341 case ARPHRD_INFINIBAND: 341 case ARPHRD_INFINIBAND:
342 ipv6_ib_mc_map(addr, dev->broadcast, buf); 342 ipv6_ib_mc_map(addr, dev->broadcast, buf);
343 return 0; 343 return 0;
344 case ARPHRD_IPGRE:
345 return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
344 default: 346 default:
345 if (dir) { 347 if (dir) {
346 memcpy(buf, dev->broadcast, dev->addr_len); 348 memcpy(buf, dev->broadcast, dev->addr_len);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39aaca2b4fd2..28bc1f644b7b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -90,9 +90,18 @@ static int nf_ip6_reroute(struct sk_buff *skb,
90 return 0; 90 return 0;
91} 91}
92 92
93static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) 93static int nf_ip6_route(struct net *net, struct dst_entry **dst,
94 struct flowi *fl, bool strict)
94{ 95{
95 *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); 96 static const struct ipv6_pinfo fake_pinfo;
97 static const struct inet_sock fake_sk = {
98 /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
99 .sk.sk_bound_dev_if = 1,
100 .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
101 };
102 const void *sk = strict ? &fake_sk : NULL;
103
104 *dst = ip6_route_output(net, sk, &fl->u.ip6);
96 return (*dst)->error; 105 return (*dst)->error;
97} 106}
98 107
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6814c8722fa7..843406f14d7b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -854,7 +854,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
855} 855}
856 856
857struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 857struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
858 struct flowi6 *fl6) 858 struct flowi6 *fl6)
859{ 859{
860 int flags = 0; 860 int flags = 0;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9c..6dcf5e7d661b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
17 17
18static struct ctl_table empty[1]; 18static struct ctl_table empty[1];
19 19
20static ctl_table ipv6_static_skeleton[] = {
21 {
22 .procname = "neigh",
23 .maxlen = 0,
24 .mode = 0555,
25 .child = empty,
26 },
27 { }
28};
29
20static ctl_table ipv6_table_template[] = { 30static ctl_table ipv6_table_template[] = {
21 { 31 {
22 .procname = "route", 32 .procname = "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
37 .mode = 0644, 47 .mode = 0644,
38 .proc_handler = proc_dointvec 48 .proc_handler = proc_dointvec
39 }, 49 },
40 {
41 .procname = "neigh",
42 .maxlen = 0,
43 .mode = 0555,
44 .child = empty,
45 },
46 { } 50 { }
47}; 51};
48 52
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
160 164
161int ipv6_static_sysctl_register(void) 165int ipv6_static_sysctl_register(void)
162{ 166{
163 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); 167 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
164 if (ip6_base == NULL) 168 if (ip6_base == NULL)
165 return -ENOMEM; 169 return -ENOMEM;
166 return 0; 170 return 0;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2731b51923d1..9680226640ef 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
148 ipx_remove_socket(sk); 148 ipx_remove_socket(sk);
149 skb_queue_purge(&sk->sk_receive_queue); 149 skb_queue_purge(&sk->sk_receive_queue);
150 sk_refcnt_debug_dec(sk); 150 sk_refcnt_debug_dec(sk);
151 sock_put(sk);
152} 151}
153 152
154/* 153/*
@@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock)
1404 sk_refcnt_debug_release(sk); 1403 sk_refcnt_debug_release(sk);
1405 ipx_destroy_socket(sk); 1404 ipx_destroy_socket(sk);
1406 release_sock(sk); 1405 release_sock(sk);
1406 sock_put(sk);
1407out: 1407out:
1408 return 0; 1408 return 0;
1409} 1409}
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 5b743bdd89ba..36477538cea8 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
656 n = 1; 656 n = 1;
657 657
658 name_len = fp[n++]; 658 name_len = fp[n++];
659
660 IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
661
659 memcpy(name, fp+n, name_len); n+=name_len; 662 memcpy(name, fp+n, name_len); n+=name_len;
660 name[name_len] = '\0'; 663 name[name_len] = '\0';
661 664
662 attr_len = fp[n++]; 665 attr_len = fp[n++];
666
667 IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
668
663 memcpy(attr, fp+n, attr_len); n+=attr_len; 669 memcpy(attr, fp+n, attr_len); n+=attr_len;
664 attr[attr_len] = '\0'; 670 attr[attr_len] = '\0';
665 671
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c567b8aa89a..2bb2beb6a373 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket * ap,
105 while(isspace(start[length - 1])) 105 while(isspace(start[length - 1]))
106 length--; 106 length--;
107 107
108 DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
109 -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
110
108 /* Copy the name for later reuse */ 111 /* Copy the name for later reuse */
109 memcpy(ap->rname, start + 5, length - 5); 112 memcpy(ap->rname, start + 5, length - 5);
110 ap->rname[length - 5] = '\0'; 113 ap->rname[length - 5] = '\0';
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc98..a8193f52c13c 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
283 return 0; 283 return 0;
284} 284}
285 285
286static __net_initdata struct pernet_operations l2tp_eth_net_ops = { 286static struct pernet_operations l2tp_eth_net_ops = {
287 .init = l2tp_eth_init_net, 287 .init = l2tp_eth_init_net,
288 .id = &l2tp_eth_net_id, 288 .id = &l2tp_eth_net_id,
289 .size = sizeof(struct l2tp_eth_net), 289 .size = sizeof(struct l2tp_eth_net),
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8c02469b7176..af3c56482c80 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -342,7 +342,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
342 if (IS_ERR(key->u.ccmp.tfm)) { 342 if (IS_ERR(key->u.ccmp.tfm)) {
343 err = PTR_ERR(key->u.ccmp.tfm); 343 err = PTR_ERR(key->u.ccmp.tfm);
344 kfree(key); 344 kfree(key);
345 key = ERR_PTR(err); 345 return ERR_PTR(err);
346 } 346 }
347 break; 347 break;
348 case WLAN_CIPHER_SUITE_AES_CMAC: 348 case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -360,7 +360,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
360 if (IS_ERR(key->u.aes_cmac.tfm)) { 360 if (IS_ERR(key->u.aes_cmac.tfm)) {
361 err = PTR_ERR(key->u.aes_cmac.tfm); 361 err = PTR_ERR(key->u.aes_cmac.tfm);
362 kfree(key); 362 kfree(key);
363 key = ERR_PTR(err); 363 return ERR_PTR(err);
364 } 364 }
365 break; 365 break;
366 } 366 }
@@ -400,11 +400,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
400{ 400{
401 struct ieee80211_key *old_key; 401 struct ieee80211_key *old_key;
402 int idx, ret; 402 int idx, ret;
403 bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; 403 bool pairwise;
404 404
405 BUG_ON(!sdata); 405 BUG_ON(!sdata);
406 BUG_ON(!key); 406 BUG_ON(!key);
407 407
408 pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
408 idx = key->conf.keyidx; 409 idx = key->conf.keyidx;
409 key->local = sdata->local; 410 key->local = sdata->local;
410 key->sdata = sdata; 411 key->sdata = sdata;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 8212a8bebf06..dbdebeda097f 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -659,18 +659,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
659 struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; 659 struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
660 struct ieee80211_local *local = hw_to_local(mp->hw); 660 struct ieee80211_local *local = hw_to_local(mp->hw);
661 u16 sta_cap = sta->ht_cap.cap; 661 u16 sta_cap = sta->ht_cap.cap;
662 int n_supported = 0;
662 int ack_dur; 663 int ack_dur;
663 int stbc; 664 int stbc;
664 int i; 665 int i;
665 666
666 /* fall back to the old minstrel for legacy stations */ 667 /* fall back to the old minstrel for legacy stations */
667 if (!sta->ht_cap.ht_supported) { 668 if (!sta->ht_cap.ht_supported)
668 msp->is_ht = false; 669 goto use_legacy;
669 memset(&msp->legacy, 0, sizeof(msp->legacy));
670 msp->legacy.r = msp->ratelist;
671 msp->legacy.sample_table = msp->sample_table;
672 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
673 }
674 670
675 BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != 671 BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
676 MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); 672 MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
@@ -725,7 +721,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
725 721
726 mi->groups[i].supported = 722 mi->groups[i].supported =
727 mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; 723 mcs->rx_mask[minstrel_mcs_groups[i].streams - 1];
724
725 if (mi->groups[i].supported)
726 n_supported++;
728 } 727 }
728
729 if (!n_supported)
730 goto use_legacy;
731
732 return;
733
734use_legacy:
735 msp->is_ht = false;
736 memset(&msp->legacy, 0, sizeof(msp->legacy));
737 msp->legacy.r = msp->ratelist;
738 msp->legacy.sample_table = msp->sample_table;
739 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
729} 740}
730 741
731static void 742static void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5c1930ba8ebe..aa5cc37b4921 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -612,7 +612,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
612 skipped++; 612 skipped++;
613 continue; 613 continue;
614 } 614 }
615 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] + 615 if (skipped &&
616 !time_after(jiffies, tid_agg_rx->reorder_time[j] +
616 HT_RX_REORDER_BUF_TIMEOUT)) 617 HT_RX_REORDER_BUF_TIMEOUT))
617 goto set_release_timer; 618 goto set_release_timer;
618 619
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5a11078827ab..d0311a322ddd 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -243,6 +243,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
243 memcpy(sta->sta.addr, addr, ETH_ALEN); 243 memcpy(sta->sta.addr, addr, ETH_ALEN);
244 sta->local = local; 244 sta->local = local;
245 sta->sdata = sdata; 245 sta->sdata = sdata;
246 sta->last_rx = jiffies;
246 247
247 ewma_init(&sta->avg_signal, 1024, 8); 248 ewma_init(&sta->avg_signal, 1024, 8);
248 249
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c3f988aa1152..32bff6d86cb2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -652,7 +652,6 @@ comment "Xtables matches"
652config NETFILTER_XT_MATCH_ADDRTYPE 652config NETFILTER_XT_MATCH_ADDRTYPE
653 tristate '"addrtype" address type match support' 653 tristate '"addrtype" address type match support'
654 depends on NETFILTER_ADVANCED 654 depends on NETFILTER_ADVANCED
655 depends on (IPV6 || IPV6=n)
656 ---help--- 655 ---help---
657 This option allows you to match what routing thinks of an address, 656 This option allows you to match what routing thinks of an address,
658 eg. UNICAST, LOCAL, BROADCAST, ... 657 eg. UNICAST, LOCAL, BROADCAST, ...
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 5c48ffb60c28..2dc6de13ac18 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
43EXPORT_SYMBOL(unregister_ip_vs_app); 43EXPORT_SYMBOL(unregister_ip_vs_app);
44EXPORT_SYMBOL(register_ip_vs_app_inc); 44EXPORT_SYMBOL(register_ip_vs_app_inc);
45 45
46static DEFINE_MUTEX(__ip_vs_app_mutex);
47
46/* 48/*
47 * Get an ip_vs_app object 49 * Get an ip_vs_app object
48 */ 50 */
@@ -167,14 +169,13 @@ int
167register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, 169register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
168 __u16 port) 170 __u16 port)
169{ 171{
170 struct netns_ipvs *ipvs = net_ipvs(net);
171 int result; 172 int result;
172 173
173 mutex_lock(&ipvs->app_mutex); 174 mutex_lock(&__ip_vs_app_mutex);
174 175
175 result = ip_vs_app_inc_new(net, app, proto, port); 176 result = ip_vs_app_inc_new(net, app, proto, port);
176 177
177 mutex_unlock(&ipvs->app_mutex); 178 mutex_unlock(&__ip_vs_app_mutex);
178 179
179 return result; 180 return result;
180} 181}
@@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
189 /* increase the module use count */ 190 /* increase the module use count */
190 ip_vs_use_count_inc(); 191 ip_vs_use_count_inc();
191 192
192 mutex_lock(&ipvs->app_mutex); 193 mutex_lock(&__ip_vs_app_mutex);
193 194
194 list_add(&app->a_list, &ipvs->app_list); 195 list_add(&app->a_list, &ipvs->app_list);
195 196
196 mutex_unlock(&ipvs->app_mutex); 197 mutex_unlock(&__ip_vs_app_mutex);
197 198
198 return 0; 199 return 0;
199} 200}
@@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
205 */ 206 */
206void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 207void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
207{ 208{
208 struct netns_ipvs *ipvs = net_ipvs(net);
209 struct ip_vs_app *inc, *nxt; 209 struct ip_vs_app *inc, *nxt;
210 210
211 mutex_lock(&ipvs->app_mutex); 211 mutex_lock(&__ip_vs_app_mutex);
212 212
213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
214 ip_vs_app_inc_release(net, inc); 214 ip_vs_app_inc_release(net, inc);
@@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
216 216
217 list_del(&app->a_list); 217 list_del(&app->a_list);
218 218
219 mutex_unlock(&ipvs->app_mutex); 219 mutex_unlock(&__ip_vs_app_mutex);
220 220
221 /* decrease the module use count */ 221 /* decrease the module use count */
222 ip_vs_use_count_dec(); 222 ip_vs_use_count_dec();
@@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
501 struct net *net = seq_file_net(seq); 501 struct net *net = seq_file_net(seq);
502 struct netns_ipvs *ipvs = net_ipvs(net); 502 struct netns_ipvs *ipvs = net_ipvs(net);
503 503
504 mutex_lock(&ipvs->app_mutex); 504 mutex_lock(&__ip_vs_app_mutex);
505 505
506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
507} 507}
@@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
535 535
536static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 536static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
537{ 537{
538 struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); 538 mutex_unlock(&__ip_vs_app_mutex);
539
540 mutex_unlock(&ipvs->app_mutex);
541} 539}
542 540
543static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 541static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net)
583 struct netns_ipvs *ipvs = net_ipvs(net); 581 struct netns_ipvs *ipvs = net_ipvs(net);
584 582
585 INIT_LIST_HEAD(&ipvs->app_list); 583 INIT_LIST_HEAD(&ipvs->app_list);
586 __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
587 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); 584 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
588 return 0; 585 return 0;
589} 586}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b799cea31f95..ae47090bf45f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3120,7 +3120,7 @@ nla_put_failure:
3120static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3120static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3121 struct netlink_callback *cb) 3121 struct netlink_callback *cb)
3122{ 3122{
3123 struct net *net = skb_net(skb); 3123 struct net *net = skb_sknet(skb);
3124 struct netns_ipvs *ipvs = net_ipvs(net); 3124 struct netns_ipvs *ipvs = net_ipvs(net);
3125 3125
3126 mutex_lock(&__ip_vs_mutex); 3126 mutex_lock(&__ip_vs_mutex);
@@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net)
3605 3605
3606 /* procfs stats */ 3606 /* procfs stats */
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3608 if (ipvs->tot_stats.cpustats) { 3608 if (!ipvs->tot_stats.cpustats) {
3609 pr_err("%s(): alloc_percpu.\n", __func__); 3609 pr_err("%s(): alloc_percpu.\n", __func__);
3610 return -ENOMEM; 3610 return -ENOMEM;
3611 } 3611 }
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 867882313e49..bcd5ed6b7130 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
631 CHECK_BOUND(bs, 2); 631 CHECK_BOUND(bs, 2);
632 count = *bs->cur++; 632 count = *bs->cur++;
633 count <<= 8; 633 count <<= 8;
634 count = *bs->cur++; 634 count += *bs->cur++;
635 break; 635 break;
636 case SEMI: 636 case SEMI:
637 BYTE_ALIGN(bs); 637 BYTE_ALIGN(bs);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 533a183e6661..18b2ce5c8ced 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -731,10 +731,10 @@ static int callforward_do_filter(const union nf_inet_addr *src,
731 731
732 memset(&fl2, 0, sizeof(fl2)); 732 memset(&fl2, 0, sizeof(fl2));
733 fl2.daddr = dst->ip; 733 fl2.daddr = dst->ip;
734 if (!afinfo->route((struct dst_entry **)&rt1, 734 if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
735 flowi4_to_flowi(&fl1))) { 735 flowi4_to_flowi(&fl1), false)) {
736 if (!afinfo->route((struct dst_entry **)&rt2, 736 if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
737 flowi4_to_flowi(&fl2))) { 737 flowi4_to_flowi(&fl2), false)) {
738 if (rt1->rt_gateway == rt2->rt_gateway && 738 if (rt1->rt_gateway == rt2->rt_gateway &&
739 rt1->dst.dev == rt2->dst.dev) 739 rt1->dst.dev == rt2->dst.dev)
740 ret = 1; 740 ret = 1;
@@ -755,10 +755,10 @@ static int callforward_do_filter(const union nf_inet_addr *src,
755 755
756 memset(&fl2, 0, sizeof(fl2)); 756 memset(&fl2, 0, sizeof(fl2));
757 ipv6_addr_copy(&fl2.daddr, &dst->in6); 757 ipv6_addr_copy(&fl2.daddr, &dst->in6);
758 if (!afinfo->route((struct dst_entry **)&rt1, 758 if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
759 flowi6_to_flowi(&fl1))) { 759 flowi6_to_flowi(&fl1), false)) {
760 if (!afinfo->route((struct dst_entry **)&rt2, 760 if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
761 flowi6_to_flowi(&fl2))) { 761 flowi6_to_flowi(&fl2), false)) {
762 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 762 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
763 sizeof(rt1->rt6i_gateway)) && 763 sizeof(rt1->rt6i_gateway)) &&
764 rt1->dst.dev == rt2->dst.dev) 764 rt1->dst.dev == rt2->dst.dev)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6e6b46cb1db9..9e63b43faeed 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
166 rcu_read_lock(); 166 rcu_read_lock();
167 ai = nf_get_afinfo(family); 167 ai = nf_get_afinfo(family);
168 if (ai != NULL) 168 if (ai != NULL)
169 ai->route((struct dst_entry **)&rt, &fl); 169 ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
170 rcu_read_unlock(); 170 rcu_read_unlock();
171 171
172 if (rt != NULL) { 172 if (rt != NULL) {
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 2220b85e9519..b77d383cec78 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype");
32MODULE_ALIAS("ip6t_addrtype"); 32MODULE_ALIAS("ip6t_addrtype");
33 33
34#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 34#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
35static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) 35static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
36 const struct in6_addr *addr)
36{ 37{
38 const struct nf_afinfo *afinfo;
39 struct flowi6 flow;
40 struct rt6_info *rt;
37 u32 ret; 41 u32 ret;
42 int route_err;
38 43
39 if (!rt) 44 memset(&flow, 0, sizeof(flow));
45 ipv6_addr_copy(&flow.daddr, addr);
46 if (dev)
47 flow.flowi6_oif = dev->ifindex;
48
49 rcu_read_lock();
50
51 afinfo = nf_get_afinfo(NFPROTO_IPV6);
52 if (afinfo != NULL)
53 route_err = afinfo->route(net, (struct dst_entry **)&rt,
54 flowi6_to_flowi(&flow), !!dev);
55 else
56 route_err = 1;
57
58 rcu_read_unlock();
59
60 if (route_err)
40 return XT_ADDRTYPE_UNREACHABLE; 61 return XT_ADDRTYPE_UNREACHABLE;
41 62
42 if (rt->rt6i_flags & RTF_REJECT) 63 if (rt->rt6i_flags & RTF_REJECT)
@@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
48 ret |= XT_ADDRTYPE_LOCAL; 69 ret |= XT_ADDRTYPE_LOCAL;
49 if (rt->rt6i_flags & RTF_ANYCAST) 70 if (rt->rt6i_flags & RTF_ANYCAST)
50 ret |= XT_ADDRTYPE_ANYCAST; 71 ret |= XT_ADDRTYPE_ANYCAST;
72
73
74 dst_release(&rt->dst);
51 return ret; 75 return ret;
52} 76}
53 77
@@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev,
65 return false; 89 return false;
66 90
67 if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | 91 if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
68 XT_ADDRTYPE_UNREACHABLE) & mask) { 92 XT_ADDRTYPE_UNREACHABLE) & mask)
69 struct rt6_info *rt; 93 return !!(mask & match_lookup_rt6(net, dev, addr));
70 u32 type;
71 int ifindex = dev ? dev->ifindex : 0;
72
73 rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
74
75 type = xt_addrtype_rt6_to_type(rt);
76
77 dst_release(&rt->dst);
78 return !!(mask & type);
79 }
80 return true; 94 return true;
81} 95}
82 96
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2c0086a4751e..481a86fdc409 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
195 return info->match_flags & XT_CONNTRACK_STATE; 195 return info->match_flags & XT_CONNTRACK_STATE;
196 if ((info->match_flags & XT_CONNTRACK_DIRECTION) && 196 if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
197 (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ 197 (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
198 !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) 198 !(info->invert_flags & XT_CONNTRACK_DIRECTION))
199 return false; 199 return false;
200 200
201 if (info->match_flags & XT_CONNTRACK_ORIGSRC) 201 if (info->match_flags & XT_CONNTRACK_ORIGSRC)
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 75ea686f27d5..6daaa49d133f 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -33,8 +33,7 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36 36#include <linux/bitops.h>
37#include <asm-generic/bitops/le.h>
38 37
39#include "rds.h" 38#include "rds.h"
40 39
@@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
285 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 284 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
286 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 285 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
287 286
288 generic___set_le_bit(off, (void *)map->m_page_addrs[i]); 287 __set_bit_le(off, (void *)map->m_page_addrs[i]);
289} 288}
290 289
291void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) 290void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
299 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 298 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
300 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 299 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
301 300
302 generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); 301 __clear_bit_le(off, (void *)map->m_page_addrs[i]);
303} 302}
304 303
305static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) 304static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
310 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; 309 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
311 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; 310 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
312 311
313 return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); 312 return test_bit_le(off, (void *)map->m_page_addrs[i]);
314} 313}
315 314
316void rds_cong_add_socket(struct rds_sock *rs) 315void rds_cong_add_socket(struct rds_sock *rs)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5ee0c62046a0..a80aef6e3d1f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -978,7 +978,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
978 struct sock *make; 978 struct sock *make;
979 struct rose_sock *make_rose; 979 struct rose_sock *make_rose;
980 struct rose_facilities_struct facilities; 980 struct rose_facilities_struct facilities;
981 int n, len; 981 int n;
982 982
983 skb->sk = NULL; /* Initially we don't know who it's for */ 983 skb->sk = NULL; /* Initially we don't know who it's for */
984 984
@@ -987,9 +987,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
987 */ 987 */
988 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 988 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
989 989
990 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; 990 if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
991 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; 991 skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
992 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 992 &facilities)) {
993 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); 993 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
994 return 0; 994 return 0;
995 } 995 }
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index ae4a9d99aec7..344456206b70 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param)
73 unsigned int lci_i, lci_o; 73 unsigned int lci_i, lci_o;
74 74
75 while ((skb = skb_dequeue(&loopback_queue)) != NULL) { 75 while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
76 if (skb->len < ROSE_MIN_LEN) {
77 kfree_skb(skb);
78 continue;
79 }
76 lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 80 lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
77 frametype = skb->data[2]; 81 frametype = skb->data[2];
78 dest = (rose_address *)(skb->data + 4); 82 if (frametype == ROSE_CALL_REQUEST &&
83 (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
84 skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
85 ROSE_CALL_REQ_ADDR_LEN_VAL)) {
86 kfree_skb(skb);
87 continue;
88 }
89 dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
79 lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i; 90 lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i;
80 91
81 skb_reset_transport_header(skb); 92 skb_reset_transport_header(skb);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 88a77e90e7e8..08dcd2f29cdc 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -861,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
861 unsigned int lci, new_lci; 861 unsigned int lci, new_lci;
862 unsigned char cause, diagnostic; 862 unsigned char cause, diagnostic;
863 struct net_device *dev; 863 struct net_device *dev;
864 int len, res = 0; 864 int res = 0;
865 char buf[11]; 865 char buf[11];
866 866
867#if 0 867#if 0
@@ -869,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
869 return res; 869 return res;
870#endif 870#endif
871 871
872 if (skb->len < ROSE_MIN_LEN)
873 return res;
872 frametype = skb->data[2]; 874 frametype = skb->data[2];
873 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 875 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
874 src_addr = (rose_address *)(skb->data + 9); 876 if (frametype == ROSE_CALL_REQUEST &&
875 dest_addr = (rose_address *)(skb->data + 4); 877 (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
878 skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
879 ROSE_CALL_REQ_ADDR_LEN_VAL))
880 return res;
881 src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
882 dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
876 883
877 spin_lock_bh(&rose_neigh_list_lock); 884 spin_lock_bh(&rose_neigh_list_lock);
878 spin_lock_bh(&rose_route_list_lock); 885 spin_lock_bh(&rose_route_list_lock);
@@ -1010,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
1010 goto out; 1017 goto out;
1011 } 1018 }
1012 1019
1013 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
1014 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
1015
1016 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 1020 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
1017 1021
1018 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 1022 if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
1023 skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
1024 &facilities)) {
1019 rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); 1025 rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
1020 goto out; 1026 goto out;
1021 } 1027 }
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 1734abba26a2..f6c71caa94b9 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype)
142 *dptr++ = ROSE_GFI | lci1; 142 *dptr++ = ROSE_GFI | lci1;
143 *dptr++ = lci2; 143 *dptr++ = lci2;
144 *dptr++ = frametype; 144 *dptr++ = frametype;
145 *dptr++ = 0xAA; 145 *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
146 memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); 146 memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN);
147 dptr += ROSE_ADDR_LEN; 147 dptr += ROSE_ADDR_LEN;
148 memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); 148 memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
@@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
246 do { 246 do {
247 switch (*p & 0xC0) { 247 switch (*p & 0xC0) {
248 case 0x00: 248 case 0x00:
249 if (len < 2)
250 return -1;
249 p += 2; 251 p += 2;
250 n += 2; 252 n += 2;
251 len -= 2; 253 len -= 2;
252 break; 254 break;
253 255
254 case 0x40: 256 case 0x40:
257 if (len < 3)
258 return -1;
255 if (*p == FAC_NATIONAL_RAND) 259 if (*p == FAC_NATIONAL_RAND)
256 facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); 260 facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
257 p += 3; 261 p += 3;
@@ -260,40 +264,61 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
260 break; 264 break;
261 265
262 case 0x80: 266 case 0x80:
267 if (len < 4)
268 return -1;
263 p += 4; 269 p += 4;
264 n += 4; 270 n += 4;
265 len -= 4; 271 len -= 4;
266 break; 272 break;
267 273
268 case 0xC0: 274 case 0xC0:
275 if (len < 2)
276 return -1;
269 l = p[1]; 277 l = p[1];
278 if (len < 2 + l)
279 return -1;
270 if (*p == FAC_NATIONAL_DEST_DIGI) { 280 if (*p == FAC_NATIONAL_DEST_DIGI) {
271 if (!fac_national_digis_received) { 281 if (!fac_national_digis_received) {
282 if (l < AX25_ADDR_LEN)
283 return -1;
272 memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); 284 memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
273 facilities->source_ndigis = 1; 285 facilities->source_ndigis = 1;
274 } 286 }
275 } 287 }
276 else if (*p == FAC_NATIONAL_SRC_DIGI) { 288 else if (*p == FAC_NATIONAL_SRC_DIGI) {
277 if (!fac_national_digis_received) { 289 if (!fac_national_digis_received) {
290 if (l < AX25_ADDR_LEN)
291 return -1;
278 memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); 292 memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
279 facilities->dest_ndigis = 1; 293 facilities->dest_ndigis = 1;
280 } 294 }
281 } 295 }
282 else if (*p == FAC_NATIONAL_FAIL_CALL) { 296 else if (*p == FAC_NATIONAL_FAIL_CALL) {
297 if (l < AX25_ADDR_LEN)
298 return -1;
283 memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); 299 memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
284 } 300 }
285 else if (*p == FAC_NATIONAL_FAIL_ADD) { 301 else if (*p == FAC_NATIONAL_FAIL_ADD) {
302 if (l < 1 + ROSE_ADDR_LEN)
303 return -1;
286 memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); 304 memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
287 } 305 }
288 else if (*p == FAC_NATIONAL_DIGIS) { 306 else if (*p == FAC_NATIONAL_DIGIS) {
307 if (l % AX25_ADDR_LEN)
308 return -1;
289 fac_national_digis_received = 1; 309 fac_national_digis_received = 1;
290 facilities->source_ndigis = 0; 310 facilities->source_ndigis = 0;
291 facilities->dest_ndigis = 0; 311 facilities->dest_ndigis = 0;
292 for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) { 312 for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
293 if (pt[6] & AX25_HBIT) 313 if (pt[6] & AX25_HBIT) {
314 if (facilities->dest_ndigis >= ROSE_MAX_DIGIS)
315 return -1;
294 memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN); 316 memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
295 else 317 } else {
318 if (facilities->source_ndigis >= ROSE_MAX_DIGIS)
319 return -1;
296 memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN); 320 memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
321 }
297 } 322 }
298 } 323 }
299 p += l + 2; 324 p += l + 2;
@@ -314,25 +339,38 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
314 do { 339 do {
315 switch (*p & 0xC0) { 340 switch (*p & 0xC0) {
316 case 0x00: 341 case 0x00:
342 if (len < 2)
343 return -1;
317 p += 2; 344 p += 2;
318 n += 2; 345 n += 2;
319 len -= 2; 346 len -= 2;
320 break; 347 break;
321 348
322 case 0x40: 349 case 0x40:
350 if (len < 3)
351 return -1;
323 p += 3; 352 p += 3;
324 n += 3; 353 n += 3;
325 len -= 3; 354 len -= 3;
326 break; 355 break;
327 356
328 case 0x80: 357 case 0x80:
358 if (len < 4)
359 return -1;
329 p += 4; 360 p += 4;
330 n += 4; 361 n += 4;
331 len -= 4; 362 len -= 4;
332 break; 363 break;
333 364
334 case 0xC0: 365 case 0xC0:
366 if (len < 2)
367 return -1;
335 l = p[1]; 368 l = p[1];
369
370 /* Prevent overflows*/
371 if (l < 10 || l > 20)
372 return -1;
373
336 if (*p == FAC_CCITT_DEST_NSAP) { 374 if (*p == FAC_CCITT_DEST_NSAP) {
337 memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); 375 memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
338 memcpy(callsign, p + 12, l - 10); 376 memcpy(callsign, p + 12, l - 10);
@@ -355,45 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
355 return n; 393 return n;
356} 394}
357 395
358int rose_parse_facilities(unsigned char *p, 396int rose_parse_facilities(unsigned char *p, unsigned packet_len,
359 struct rose_facilities_struct *facilities) 397 struct rose_facilities_struct *facilities)
360{ 398{
361 int facilities_len, len; 399 int facilities_len, len;
362 400
363 facilities_len = *p++; 401 facilities_len = *p++;
364 402
365 if (facilities_len == 0) 403 if (facilities_len == 0 || (unsigned)facilities_len > packet_len)
366 return 0; 404 return 0;
367 405
368 while (facilities_len > 0) { 406 while (facilities_len >= 3 && *p == 0x00) {
369 if (*p == 0x00) { 407 facilities_len--;
370 facilities_len--; 408 p++;
371 p++; 409
372 410 switch (*p) {
373 switch (*p) { 411 case FAC_NATIONAL: /* National */
374 case FAC_NATIONAL: /* National */ 412 len = rose_parse_national(p + 1, facilities, facilities_len - 1);
375 len = rose_parse_national(p + 1, facilities, facilities_len - 1); 413 break;
376 facilities_len -= len + 1; 414
377 p += len + 1; 415 case FAC_CCITT: /* CCITT */
378 break; 416 len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
379 417 break;
380 case FAC_CCITT: /* CCITT */ 418
381 len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); 419 default:
382 facilities_len -= len + 1; 420 printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
383 p += len + 1; 421 len = 1;
384 break; 422 break;
385 423 }
386 default: 424
387 printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); 425 if (len < 0)
388 facilities_len--; 426 return 0;
389 p++; 427 if (WARN_ON(len >= facilities_len))
390 break; 428 return 0;
391 } 429 facilities_len -= len + 1;
392 } else 430 p += len + 1;
393 break; /* Error in facilities format */
394 } 431 }
395 432
396 return 1; 433 return facilities_len == 0;
397} 434}
398 435
399static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) 436static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 152976ec0b74..d5bf91d04f63 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1205,7 +1205,7 @@ SCTP_STATIC __init int sctp_init(void)
1205 if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) 1205 if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
1206 continue; 1206 continue;
1207 sctp_assoc_hashtable = (struct sctp_hashbucket *) 1207 sctp_assoc_hashtable = (struct sctp_hashbucket *)
1208 __get_free_pages(GFP_ATOMIC, order); 1208 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
1209 } while (!sctp_assoc_hashtable && --order > 0); 1209 } while (!sctp_assoc_hashtable && --order > 0);
1210 if (!sctp_assoc_hashtable) { 1210 if (!sctp_assoc_hashtable) {
1211 pr_err("Failed association hash alloc\n"); 1211 pr_err("Failed association hash alloc\n");
@@ -1238,7 +1238,7 @@ SCTP_STATIC __init int sctp_init(void)
1238 if ((sctp_port_hashsize > (64 * 1024)) && order > 0) 1238 if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
1239 continue; 1239 continue;
1240 sctp_port_hashtable = (struct sctp_bind_hashbucket *) 1240 sctp_port_hashtable = (struct sctp_bind_hashbucket *)
1241 __get_free_pages(GFP_ATOMIC, order); 1241 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
1242 } while (!sctp_port_hashtable && --order > 0); 1242 } while (!sctp_port_hashtable && --order > 0);
1243 if (!sctp_port_hashtable) { 1243 if (!sctp_port_hashtable) {
1244 pr_err("Failed bind hash alloc\n"); 1244 pr_err("Failed bind hash alloc\n");
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b4061049d76..e3c36a274412 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
160 160
161EXPORT_SYMBOL_GPL(gss_mech_get_by_name); 161EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
162 162
163struct gss_api_mech *
164gss_mech_get_by_OID(struct xdr_netobj *obj)
165{
166 struct gss_api_mech *pos, *gm = NULL;
167
168 spin_lock(&registered_mechs_lock);
169 list_for_each_entry(pos, &registered_mechs, gm_list) {
170 if (obj->len == pos->gm_oid.len) {
171 if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
172 if (try_module_get(pos->gm_owner))
173 gm = pos;
174 break;
175 }
176 }
177 }
178 spin_unlock(&registered_mechs_lock);
179 return gm;
180
181}
182
183EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
184
163static inline int 185static inline int
164mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) 186mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
165{ 187{
@@ -193,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
193 215
194EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); 216EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
195 217
218int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
219{
220 struct gss_api_mech *pos = NULL;
221 int i = 0;
222
223 spin_lock(&registered_mechs_lock);
224 list_for_each_entry(pos, &registered_mechs, gm_list) {
225 array_ptr[i] = pos->gm_pfs->pseudoflavor;
226 i++;
227 }
228 spin_unlock(&registered_mechs_lock);
229 return i;
230}
231
232EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
233
196u32 234u32
197gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) 235gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
198{ 236{
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ffb687671da0..6b43ee7221d5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -860,8 +860,10 @@ static void rpc_release_resources_task(struct rpc_task *task)
860{ 860{
861 if (task->tk_rqstp) 861 if (task->tk_rqstp)
862 xprt_release(task); 862 xprt_release(task);
863 if (task->tk_msg.rpc_cred) 863 if (task->tk_msg.rpc_cred) {
864 put_rpccred(task->tk_msg.rpc_cred); 864 put_rpccred(task->tk_msg.rpc_cred);
865 task->tk_msg.rpc_cred = NULL;
866 }
865 rpc_task_release_client(task); 867 rpc_task_release_client(task);
866} 868}
867 869
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 30916b06c12b..c8e10216c113 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -38,6 +38,14 @@ struct unix_domain {
38 38
39extern struct auth_ops svcauth_unix; 39extern struct auth_ops svcauth_unix;
40 40
41static void svcauth_unix_domain_release(struct auth_domain *dom)
42{
43 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
44
45 kfree(dom->name);
46 kfree(ud);
47}
48
41struct auth_domain *unix_domain_find(char *name) 49struct auth_domain *unix_domain_find(char *name)
42{ 50{
43 struct auth_domain *rv; 51 struct auth_domain *rv;
@@ -47,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
47 while(1) { 55 while(1) {
48 if (rv) { 56 if (rv) {
49 if (new && rv != &new->h) 57 if (new && rv != &new->h)
50 auth_domain_put(&new->h); 58 svcauth_unix_domain_release(&new->h);
51 59
52 if (rv->flavour != &svcauth_unix) { 60 if (rv->flavour != &svcauth_unix) {
53 auth_domain_put(rv); 61 auth_domain_put(rv);
@@ -74,14 +82,6 @@ struct auth_domain *unix_domain_find(char *name)
74} 82}
75EXPORT_SYMBOL_GPL(unix_domain_find); 83EXPORT_SYMBOL_GPL(unix_domain_find);
76 84
77static void svcauth_unix_domain_release(struct auth_domain *dom)
78{
79 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
80
81 kfree(dom->name);
82 kfree(ud);
83}
84
85 85
86/************************************************** 86/**************************************************
87 * cache for IP address to unix_domain 87 * cache for IP address to unix_domain
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be96d429b475..1e336a06d3e6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
710 if (sk == NULL) 710 if (sk == NULL)
711 return; 711 return;
712 712
713 transport->srcport = 0;
714
713 write_lock_bh(&sk->sk_callback_lock); 715 write_lock_bh(&sk->sk_callback_lock);
714 transport->inet = NULL; 716 transport->inet = NULL;
715 transport->sock = NULL; 717 transport->sock = NULL;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ea427f418f64..fbf6f33ae4d0 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -124,6 +124,15 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev,
124} 124}
125 125
126/* must hold dev->bss_lock! */ 126/* must hold dev->bss_lock! */
127static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
128 struct cfg80211_internal_bss *bss)
129{
130 list_del_init(&bss->list);
131 rb_erase(&bss->rbn, &dev->bss_tree);
132 kref_put(&bss->ref, bss_release);
133}
134
135/* must hold dev->bss_lock! */
127void cfg80211_bss_expire(struct cfg80211_registered_device *dev) 136void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
128{ 137{
129 struct cfg80211_internal_bss *bss, *tmp; 138 struct cfg80211_internal_bss *bss, *tmp;
@@ -134,9 +143,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
134 continue; 143 continue;
135 if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) 144 if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
136 continue; 145 continue;
137 list_del(&bss->list); 146 __cfg80211_unlink_bss(dev, bss);
138 rb_erase(&bss->rbn, &dev->bss_tree);
139 kref_put(&bss->ref, bss_release);
140 expired = true; 147 expired = true;
141 } 148 }
142 149
@@ -585,16 +592,23 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
585 struct cfg80211_internal_bss *res; 592 struct cfg80211_internal_bss *res;
586 size_t ielen = len - offsetof(struct ieee80211_mgmt, 593 size_t ielen = len - offsetof(struct ieee80211_mgmt,
587 u.probe_resp.variable); 594 u.probe_resp.variable);
588 size_t privsz = wiphy->bss_priv_size; 595 size_t privsz;
596
597 if (WARN_ON(!mgmt))
598 return NULL;
599
600 if (WARN_ON(!wiphy))
601 return NULL;
589 602
590 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && 603 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
591 (signal < 0 || signal > 100))) 604 (signal < 0 || signal > 100)))
592 return NULL; 605 return NULL;
593 606
594 if (WARN_ON(!mgmt || !wiphy || 607 if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
595 len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
596 return NULL; 608 return NULL;
597 609
610 privsz = wiphy->bss_priv_size;
611
598 res = kzalloc(sizeof(*res) + privsz + ielen, gfp); 612 res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
599 if (!res) 613 if (!res)
600 return NULL; 614 return NULL;
@@ -662,11 +676,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
662 676
663 spin_lock_bh(&dev->bss_lock); 677 spin_lock_bh(&dev->bss_lock);
664 if (!list_empty(&bss->list)) { 678 if (!list_empty(&bss->list)) {
665 list_del_init(&bss->list); 679 __cfg80211_unlink_bss(dev, bss);
666 dev->bss_generation++; 680 dev->bss_generation++;
667 rb_erase(&bss->rbn, &dev->bss_tree);
668
669 kref_put(&bss->ref, bss_release);
670 } 681 }
671 spin_unlock_bh(&dev->bss_lock); 682 spin_unlock_bh(&dev->bss_lock);
672} 683}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 872065ca7f8c..a026b0ef2443 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -173,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
173 goto drop_unlock; 173 goto drop_unlock;
174 } 174 }
175 175
176 if (x->props.replay_window && x->repl->check(x, skb, seq)) { 176 if (x->repl->check(x, skb, seq)) {
177 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 177 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
178 goto drop_unlock; 178 goto drop_unlock;
179 } 179 }
@@ -190,6 +190,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
190 XFRM_SKB_CB(skb)->seq.input.low = seq; 190 XFRM_SKB_CB(skb)->seq.input.low = seq;
191 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 191 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
192 192
193 skb_dst_force(skb);
194
193 nexthdr = x->type->input(x, skb); 195 nexthdr = x->type->input(x, skb);
194 196
195 if (nexthdr == -EINPROGRESS) 197 if (nexthdr == -EINPROGRESS)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 1aba03f449cc..47bacd8c0250 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
78 78
79 spin_unlock_bh(&x->lock); 79 spin_unlock_bh(&x->lock);
80 80
81 skb_dst_force(skb);
82
81 err = x->type->output(x, skb); 83 err = x->type->output(x, skb);
82 if (err == -EINPROGRESS) 84 if (err == -EINPROGRESS)
83 goto out_exit; 85 goto out_exit;
@@ -94,7 +96,7 @@ resume:
94 err = -EHOSTUNREACH; 96 err = -EHOSTUNREACH;
95 goto error_nolock; 97 goto error_nolock;
96 } 98 }
97 skb_dst_set(skb, dst_clone(dst)); 99 skb_dst_set(skb, dst);
98 x = dst->xfrm; 100 x = dst->xfrm;
99 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); 101 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
100 102
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 2f5be5b15740..f218385950ca 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -118,6 +118,9 @@ static int xfrm_replay_check(struct xfrm_state *x,
118 u32 diff; 118 u32 diff;
119 u32 seq = ntohl(net_seq); 119 u32 seq = ntohl(net_seq);
120 120
121 if (!x->props.replay_window)
122 return 0;
123
121 if (unlikely(seq == 0)) 124 if (unlikely(seq == 0))
122 goto err; 125 goto err;
123 126
@@ -193,9 +196,14 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x,
193{ 196{
194 unsigned int bitnr, nr; 197 unsigned int bitnr, nr;
195 struct xfrm_replay_state_esn *replay_esn = x->replay_esn; 198 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
199 u32 pos;
196 u32 seq = ntohl(net_seq); 200 u32 seq = ntohl(net_seq);
197 u32 diff = replay_esn->seq - seq; 201 u32 diff = replay_esn->seq - seq;
198 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; 202
203 if (!replay_esn->replay_window)
204 return 0;
205
206 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
199 207
200 if (unlikely(seq == 0)) 208 if (unlikely(seq == 0))
201 goto err; 209 goto err;
@@ -373,12 +381,17 @@ static int xfrm_replay_check_esn(struct xfrm_state *x,
373 unsigned int bitnr, nr; 381 unsigned int bitnr, nr;
374 u32 diff; 382 u32 diff;
375 struct xfrm_replay_state_esn *replay_esn = x->replay_esn; 383 struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
384 u32 pos;
376 u32 seq = ntohl(net_seq); 385 u32 seq = ntohl(net_seq);
377 u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
378 u32 wsize = replay_esn->replay_window; 386 u32 wsize = replay_esn->replay_window;
379 u32 top = replay_esn->seq; 387 u32 top = replay_esn->seq;
380 u32 bottom = top - wsize + 1; 388 u32 bottom = top - wsize + 1;
381 389
390 if (!wsize)
391 return 0;
392
393 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
394
382 if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && 395 if (unlikely(seq == 0 && replay_esn->seq_hi == 0 &&
383 (replay_esn->seq < replay_esn->replay_window - 1))) 396 (replay_esn->seq < replay_esn->replay_window - 1)))
384 goto err; 397 goto err;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d575f0534868..dd78536d40de 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1181,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1181 goto error; 1181 goto error;
1182 } 1182 }
1183 1183
1184 if (orig->replay_esn) {
1185 err = xfrm_replay_clone(x, orig);
1186 if (err)
1187 goto error;
1188 }
1189
1184 memcpy(&x->mark, &orig->mark, sizeof(x->mark)); 1190 memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1185 1191
1186 err = xfrm_init_state(x); 1192 err = xfrm_init_state(x);
@@ -1907,7 +1913,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1907 return res; 1913 return res;
1908} 1914}
1909 1915
1910int xfrm_init_state(struct xfrm_state *x) 1916int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
1911{ 1917{
1912 struct xfrm_state_afinfo *afinfo; 1918 struct xfrm_state_afinfo *afinfo;
1913 struct xfrm_mode *inner_mode; 1919 struct xfrm_mode *inner_mode;
@@ -1980,12 +1986,25 @@ int xfrm_init_state(struct xfrm_state *x)
1980 if (x->outer_mode == NULL) 1986 if (x->outer_mode == NULL)
1981 goto error; 1987 goto error;
1982 1988
1989 if (init_replay) {
1990 err = xfrm_init_replay(x);
1991 if (err)
1992 goto error;
1993 }
1994
1983 x->km.state = XFRM_STATE_VALID; 1995 x->km.state = XFRM_STATE_VALID;
1984 1996
1985error: 1997error:
1986 return err; 1998 return err;
1987} 1999}
1988 2000
2001EXPORT_SYMBOL(__xfrm_init_state);
2002
2003int xfrm_init_state(struct xfrm_state *x)
2004{
2005 return __xfrm_init_state(x, true);
2006}
2007
1989EXPORT_SYMBOL(xfrm_init_state); 2008EXPORT_SYMBOL(xfrm_init_state);
1990 2009
1991int __net_init xfrm_state_init(struct net *net) 2010int __net_init xfrm_state_init(struct net *net)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 706385ae3e4b..3d15d3e1b2c4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -127,6 +127,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
127 if (!rt) 127 if (!rt)
128 return 0; 128 return 0;
129 129
130 if (p->id.proto != IPPROTO_ESP)
131 return -EINVAL;
132
130 if (p->replay_window != 0) 133 if (p->replay_window != 0)
131 return -EINVAL; 134 return -EINVAL;
132 135
@@ -360,6 +363,23 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
360 return 0; 363 return 0;
361} 364}
362 365
366static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn,
367 struct nlattr *rp)
368{
369 struct xfrm_replay_state_esn *up;
370
371 if (!replay_esn || !rp)
372 return 0;
373
374 up = nla_data(rp);
375
376 if (xfrm_replay_state_esn_len(replay_esn) !=
377 xfrm_replay_state_esn_len(up))
378 return -EINVAL;
379
380 return 0;
381}
382
363static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, 383static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn,
364 struct xfrm_replay_state_esn **preplay_esn, 384 struct xfrm_replay_state_esn **preplay_esn,
365 struct nlattr *rta) 385 struct nlattr *rta)
@@ -511,7 +531,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
511 531
512 xfrm_mark_get(attrs, &x->mark); 532 xfrm_mark_get(attrs, &x->mark);
513 533
514 err = xfrm_init_state(x); 534 err = __xfrm_init_state(x, false);
515 if (err) 535 if (err)
516 goto error; 536 goto error;
517 537
@@ -1766,6 +1786,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1766 if (x->km.state != XFRM_STATE_VALID) 1786 if (x->km.state != XFRM_STATE_VALID)
1767 goto out; 1787 goto out;
1768 1788
1789 err = xfrm_replay_verify_len(x->replay_esn, rp);
1790 if (err)
1791 goto out;
1792
1769 spin_lock_bh(&x->lock); 1793 spin_lock_bh(&x->lock);
1770 xfrm_update_ae_params(x, attrs); 1794 xfrm_update_ae_params(x, attrs);
1771 spin_unlock_bh(&x->lock); 1795 spin_unlock_bh(&x->lock);