aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/8021q/vlanproc.c11
-rw-r--r--net/9p/trans_virtio.c48
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/bluetooth/af_bluetooth.c27
-rw-r--r--net/bluetooth/bnep/sock.c2
-rw-r--r--net/bluetooth/cmtp/capi.c2
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hidp/sock.c2
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/proc.c2
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/auth.c117
-rw-r--r--net/ceph/auth_x.c24
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/messenger.c1019
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/osd_client.c1087
-rw-r--r--net/ceph/osdmap.c45
-rw-r--r--net/ceph/snapshot.c78
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c7
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/gre.c4
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c10
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_metrics.c15
-rw-r--r--net/ipv4/udp.c9
-rw-r--r--net/ipv6/proc.c5
-rw-r--r--net/netfilter/x_tables.c6
-rw-r--r--net/netfilter/xt_hashlimit.c16
-rw-r--r--net/netfilter/xt_recent.c9
-rw-r--r--net/packet/af_packet.c53
-rw-r--r--net/socket.c9
-rw-r--r--net/sunrpc/auth_gss/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c7
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c7
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c358
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.h48
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c838
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h264
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c363
-rw-r--r--net/sunrpc/cache.c28
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/netns.h6
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/bcast.c40
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c29
-rw-r--r--net/wireless/lib80211_crypt_tkip.c44
-rw-r--r--net/wireless/lib80211_crypt_wep.c5
-rw-r--r--net/x25/x25_proc.c47
-rw-r--r--net/xfrm/xfrm_algo.c13
70 files changed, 3818 insertions, 1000 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8af508536d36..3a8c8fd63c88 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -628,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
628 netdev_features_t features) 628 netdev_features_t features)
629{ 629{
630 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; 630 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
631 u32 old_features = features; 631 netdev_features_t old_features = features;
632 632
633 features &= real_dev->vlan_features; 633 features &= real_dev->vlan_features;
634 features |= NETIF_F_RXCSUM; 634 features |= NETIF_F_RXCSUM;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index dc526ec965e4..1d0e89213a28 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -93,7 +93,7 @@ static const struct file_operations vlan_fops = {
93 93
94static int vlandev_seq_open(struct inode *inode, struct file *file) 94static int vlandev_seq_open(struct inode *inode, struct file *file)
95{ 95{
96 return single_open(file, vlandev_seq_show, PDE(inode)->data); 96 return single_open(file, vlandev_seq_show, PDE_DATA(inode));
97} 97}
98 98
99static const struct file_operations vlandev_fops = { 99static const struct file_operations vlandev_fops = {
@@ -184,14 +184,9 @@ int vlan_proc_add_dev(struct net_device *vlandev)
184 */ 184 */
185int vlan_proc_rem_dev(struct net_device *vlandev) 185int vlan_proc_rem_dev(struct net_device *vlandev)
186{ 186{
187 struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
188
189 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 187 /** NOTE: This will consume the memory pointed to by dent, it seems. */
190 if (vlan_dev_priv(vlandev)->dent) { 188 proc_remove(vlan_dev_priv(vlandev)->dent);
191 remove_proc_entry(vlan_dev_priv(vlandev)->dent->name, 189 vlan_dev_priv(vlandev)->dent = NULL;
192 vn->proc_vlan_dir);
193 vlan_dev_priv(vlandev)->dent = NULL;
194 }
195 return 0; 190 return 0;
196} 191}
197 192
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de2e950a0a7a..e1c26b101830 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
194 if (s > count) 194 if (s > count)
195 s = count; 195 s = count;
196 BUG_ON(index > limit); 196 BUG_ON(index > limit);
197 /* Make sure we don't terminate early. */
198 sg_unmark_end(&sg[index]);
197 sg_set_buf(&sg[index++], data, s); 199 sg_set_buf(&sg[index++], data, s);
198 count -= s; 200 count -= s;
199 data += s; 201 data += s;
200 } 202 }
201 203 if (index-start)
204 sg_mark_end(&sg[index - 1]);
202 return index-start; 205 return index-start;
203} 206}
204 207
@@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
236 s = rest_of_page(data); 239 s = rest_of_page(data);
237 if (s > count) 240 if (s > count)
238 s = count; 241 s = count;
242 /* Make sure we don't terminate early. */
243 sg_unmark_end(&sg[index]);
239 sg_set_page(&sg[index++], pdata[i++], s, data_off); 244 sg_set_page(&sg[index++], pdata[i++], s, data_off);
240 data_off = 0; 245 data_off = 0;
241 data += s; 246 data += s;
242 count -= s; 247 count -= s;
243 nr_pages--; 248 nr_pages--;
244 } 249 }
250
251 if (index-start)
252 sg_mark_end(&sg[index - 1]);
245 return index - start; 253 return index - start;
246} 254}
247 255
@@ -256,9 +264,10 @@ static int
256p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 264p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
257{ 265{
258 int err; 266 int err;
259 int in, out; 267 int in, out, out_sgs, in_sgs;
260 unsigned long flags; 268 unsigned long flags;
261 struct virtio_chan *chan = client->trans; 269 struct virtio_chan *chan = client->trans;
270 struct scatterlist *sgs[2];
262 271
263 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
264 273
@@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
266req_retry: 275req_retry:
267 spin_lock_irqsave(&chan->lock, flags); 276 spin_lock_irqsave(&chan->lock, flags);
268 277
278 out_sgs = in_sgs = 0;
269 /* Handle out VirtIO ring buffers */ 279 /* Handle out VirtIO ring buffers */
270 out = pack_sg_list(chan->sg, 0, 280 out = pack_sg_list(chan->sg, 0,
271 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 281 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
282 if (out)
283 sgs[out_sgs++] = chan->sg;
272 284
273 in = pack_sg_list(chan->sg, out, 285 in = pack_sg_list(chan->sg, out,
274 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 286 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
287 if (in)
288 sgs[out_sgs + in_sgs++] = chan->sg + out;
275 289
276 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 290 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
277 GFP_ATOMIC); 291 GFP_ATOMIC);
278 if (err < 0) { 292 if (err < 0) {
279 if (err == -ENOSPC) { 293 if (err == -ENOSPC) {
@@ -289,7 +303,7 @@ req_retry:
289 } else { 303 } else {
290 spin_unlock_irqrestore(&chan->lock, flags); 304 spin_unlock_irqrestore(&chan->lock, flags);
291 p9_debug(P9_DEBUG_TRANS, 305 p9_debug(P9_DEBUG_TRANS,
292 "virtio rpc add_buf returned failure\n"); 306 "virtio rpc add_sgs returned failure\n");
293 return -EIO; 307 return -EIO;
294 } 308 }
295 } 309 }
@@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
351 char *uidata, char *uodata, int inlen, 365 char *uidata, char *uodata, int inlen,
352 int outlen, int in_hdr_len, int kern_buf) 366 int outlen, int in_hdr_len, int kern_buf)
353{ 367{
354 int in, out, err; 368 int in, out, err, out_sgs, in_sgs;
355 unsigned long flags; 369 unsigned long flags;
356 int in_nr_pages = 0, out_nr_pages = 0; 370 int in_nr_pages = 0, out_nr_pages = 0;
357 struct page **in_pages = NULL, **out_pages = NULL; 371 struct page **in_pages = NULL, **out_pages = NULL;
358 struct virtio_chan *chan = client->trans; 372 struct virtio_chan *chan = client->trans;
373 struct scatterlist *sgs[4];
359 374
360 p9_debug(P9_DEBUG_TRANS, "virtio request\n"); 375 p9_debug(P9_DEBUG_TRANS, "virtio request\n");
361 376
@@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
396 req->status = REQ_STATUS_SENT; 411 req->status = REQ_STATUS_SENT;
397req_retry_pinned: 412req_retry_pinned:
398 spin_lock_irqsave(&chan->lock, flags); 413 spin_lock_irqsave(&chan->lock, flags);
414
415 out_sgs = in_sgs = 0;
416
399 /* out data */ 417 /* out data */
400 out = pack_sg_list(chan->sg, 0, 418 out = pack_sg_list(chan->sg, 0,
401 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 419 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
402 420
403 if (out_pages) 421 if (out)
422 sgs[out_sgs++] = chan->sg;
423
424 if (out_pages) {
425 sgs[out_sgs++] = chan->sg + out;
404 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 426 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
405 out_pages, out_nr_pages, uodata, outlen); 427 out_pages, out_nr_pages, uodata, outlen);
428 }
429
406 /* 430 /*
407 * Take care of in data 431 * Take care of in data
408 * For example TREAD have 11. 432 * For example TREAD have 11.
@@ -412,11 +436,17 @@ req_retry_pinned:
412 */ 436 */
413 in = pack_sg_list(chan->sg, out, 437 in = pack_sg_list(chan->sg, out,
414 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 438 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
415 if (in_pages) 439 if (in)
440 sgs[out_sgs + in_sgs++] = chan->sg + out;
441
442 if (in_pages) {
443 sgs[out_sgs + in_sgs++] = chan->sg + out + in;
416 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 444 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
417 in_pages, in_nr_pages, uidata, inlen); 445 in_pages, in_nr_pages, uidata, inlen);
446 }
418 447
419 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 448 BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
449 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
420 GFP_ATOMIC); 450 GFP_ATOMIC);
421 if (err < 0) { 451 if (err < 0) {
422 if (err == -ENOSPC) { 452 if (err == -ENOSPC) {
@@ -432,7 +462,7 @@ req_retry_pinned:
432 } else { 462 } else {
433 spin_unlock_irqrestore(&chan->lock, flags); 463 spin_unlock_irqrestore(&chan->lock, flags);
434 p9_debug(P9_DEBUG_TRANS, 464 p9_debug(P9_DEBUG_TRANS,
435 "virtio rpc add_buf returned failure\n"); 465 "virtio rpc add_sgs returned failure\n");
436 err = -EIO; 466 err = -EIO;
437 goto err_out; 467 goto err_out;
438 } 468 }
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 6ac35ff0d6b9..bbb6461a4b7f 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
385 page = get_zeroed_page(GFP_KERNEL); 385 page = get_zeroed_page(GFP_KERNEL);
386 if (!page) 386 if (!page)
387 return -ENOMEM; 387 return -ENOMEM;
388 dev = PDE(file_inode(file))->data; 388 dev = PDE_DATA(file_inode(file));
389 if (!dev->ops->proc_read) 389 if (!dev->ops->proc_read)
390 length = -EINVAL; 390 length = -EINVAL;
391 else { 391 else {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index e5338f787d68..9096137c889c 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -609,7 +609,7 @@ static int bt_seq_open(struct inode *inode, struct file *file)
609 struct bt_sock_list *sk_list; 609 struct bt_sock_list *sk_list;
610 struct bt_seq_state *s; 610 struct bt_seq_state *s;
611 611
612 sk_list = PDE(inode)->data; 612 sk_list = PDE_DATA(inode);
613 s = __seq_open_private(file, &bt_seq_ops, 613 s = __seq_open_private(file, &bt_seq_ops,
614 sizeof(struct bt_seq_state)); 614 sizeof(struct bt_seq_state));
615 if (!s) 615 if (!s)
@@ -619,26 +619,21 @@ static int bt_seq_open(struct inode *inode, struct file *file)
619 return 0; 619 return 0;
620} 620}
621 621
622int bt_procfs_init(struct module* module, struct net *net, const char *name, 622static const struct file_operations bt_fops = {
623 .open = bt_seq_open,
624 .read = seq_read,
625 .llseek = seq_lseek,
626 .release = seq_release_private
627};
628
629int bt_procfs_init(struct net *net, const char *name,
623 struct bt_sock_list* sk_list, 630 struct bt_sock_list* sk_list,
624 int (* seq_show)(struct seq_file *, void *)) 631 int (* seq_show)(struct seq_file *, void *))
625{ 632{
626 struct proc_dir_entry * pde;
627
628 sk_list->custom_seq_show = seq_show; 633 sk_list->custom_seq_show = seq_show;
629 634
630 sk_list->fops.owner = module; 635 if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
631 sk_list->fops.open = bt_seq_open;
632 sk_list->fops.read = seq_read;
633 sk_list->fops.llseek = seq_lseek;
634 sk_list->fops.release = seq_release_private;
635
636 pde = proc_create(name, 0, net->proc_net, &sk_list->fops);
637 if (!pde)
638 return -ENOMEM; 636 return -ENOMEM;
639
640 pde->data = sk_list;
641
642 return 0; 637 return 0;
643} 638}
644 639
@@ -647,7 +642,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
647 remove_proc_entry(name, net->proc_net); 642 remove_proc_entry(name, net->proc_net);
648} 643}
649#else 644#else
650int bt_procfs_init(struct module* module, struct net *net, const char *name, 645int bt_procfs_init(struct net *net, const char *name,
651 struct bt_sock_list* sk_list, 646 struct bt_sock_list* sk_list,
652 int (* seq_show)(struct seq_file *, void *)) 647 int (* seq_show)(struct seq_file *, void *))
653{ 648{
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 5b1c04e28821..5f051290daba 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -234,7 +234,7 @@ int __init bnep_sock_init(void)
234 goto error; 234 goto error;
235 } 235 }
236 236
237 err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL); 237 err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL);
238 if (err < 0) { 238 if (err < 0) {
239 BT_ERR("Failed to create BNEP proc file"); 239 BT_ERR("Failed to create BNEP proc file");
240 bt_sock_unregister(BTPROTO_BNEP); 240 bt_sock_unregister(BTPROTO_BNEP);
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index a4a9d4b6816c..cd75e4d64b90 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -539,7 +539,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
539 539
540static int cmtp_proc_open(struct inode *inode, struct file *file) 540static int cmtp_proc_open(struct inode *inode, struct file *file)
541{ 541{
542 return single_open(file, cmtp_proc_show, PDE(inode)->data); 542 return single_open(file, cmtp_proc_show, PDE_DATA(inode));
543} 543}
544 544
545static const struct file_operations cmtp_proc_fops = { 545static const struct file_operations cmtp_proc_fops = {
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 58d9edebab4b..d82787d417bd 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -245,7 +245,7 @@ int cmtp_init_sockets(void)
245 goto error; 245 goto error;
246 } 246 }
247 247
248 err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL); 248 err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL);
249 if (err < 0) { 249 if (err < 0) {
250 BT_ERR("Failed to create CMTP proc file"); 250 BT_ERR("Failed to create CMTP proc file");
251 bt_sock_unregister(BTPROTO_HIDP); 251 bt_sock_unregister(BTPROTO_HIDP);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index aa4354fca77c..9bd7d959e384 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1107,7 +1107,7 @@ int __init hci_sock_init(void)
1107 goto error; 1107 goto error;
1108 } 1108 }
1109 1109
1110 err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL); 1110 err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL);
1111 if (err < 0) { 1111 if (err < 0) {
1112 BT_ERR("Failed to create HCI proc file"); 1112 BT_ERR("Failed to create HCI proc file");
1113 bt_sock_unregister(BTPROTO_HCI); 1113 bt_sock_unregister(BTPROTO_HCI);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 2f4cbb0865ca..cb3fdde1968a 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -275,7 +275,7 @@ int __init hidp_init_sockets(void)
275 goto error; 275 goto error;
276 } 276 }
277 277
278 err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL); 278 err = bt_procfs_init(&init_net, "hidp", &hidp_sk_list, NULL);
279 if (err < 0) { 279 if (err < 0) {
280 BT_ERR("Failed to create HIDP proc file"); 280 BT_ERR("Failed to create HIDP proc file");
281 bt_sock_unregister(BTPROTO_HIDP); 281 bt_sock_unregister(BTPROTO_HIDP);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 141e7b058b7e..36fed40c162c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1298,7 +1298,7 @@ int __init l2cap_init_sockets(void)
1298 goto error; 1298 goto error;
1299 } 1299 }
1300 1300
1301 err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, 1301 err = bt_procfs_init(&init_net, "l2cap", &l2cap_sk_list,
1302 NULL); 1302 NULL);
1303 if (err < 0) { 1303 if (err < 0) {
1304 BT_ERR("Failed to create L2CAP proc file"); 1304 BT_ERR("Failed to create L2CAP proc file");
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index a8638b58c4bf..30b3721dc6d7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1037,7 +1037,7 @@ int __init rfcomm_init_sockets(void)
1037 goto error; 1037 goto error;
1038 } 1038 }
1039 1039
1040 err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL); 1040 err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL);
1041 if (err < 0) { 1041 if (err < 0) {
1042 BT_ERR("Failed to create RFCOMM proc file"); 1042 BT_ERR("Failed to create RFCOMM proc file");
1043 bt_sock_unregister(BTPROTO_RFCOMM); 1043 bt_sock_unregister(BTPROTO_RFCOMM);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 373d81e6e8f0..e7bd4eea575c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1119,7 +1119,7 @@ int __init sco_init(void)
1119 goto error; 1119 goto error;
1120 } 1120 }
1121 1121
1122 err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL); 1122 err = bt_procfs_init(&init_net, "sco", &sco_sk_list, NULL);
1123 if (err < 0) { 1123 if (err < 0) {
1124 BT_ERR("Failed to create SCO proc file"); 1124 BT_ERR("Failed to create SCO proc file");
1125 bt_sock_unregister(BTPROTO_SCO); 1125 bt_sock_unregister(BTPROTO_SCO);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c3530a81a33b..950663d4d330 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg)
107 107
108 br_debug(br, "tcn timer expired\n"); 108 br_debug(br, "tcn timer expired\n");
109 spin_lock(&br->lock); 109 spin_lock(&br->lock);
110 if (br->dev->flags & IFF_UP) { 110 if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) {
111 br_transmit_tcn(br); 111 br_transmit_tcn(br);
112 112
113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); 113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 5dcb20076f39..8f113e6ff327 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -226,7 +226,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
226 226
227static int bcm_proc_open(struct inode *inode, struct file *file) 227static int bcm_proc_open(struct inode *inode, struct file *file)
228{ 228{
229 return single_open(file, bcm_proc_show, PDE(inode)->data); 229 return single_open(file, bcm_proc_show, PDE_DATA(inode));
230} 230}
231 231
232static const struct file_operations bcm_proc_fops = { 232static const struct file_operations bcm_proc_fops = {
diff --git a/net/can/proc.c b/net/can/proc.c
index 1ab8c888f102..b543470c8f8b 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -378,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
378 378
379static int can_rcvlist_proc_open(struct inode *inode, struct file *file) 379static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
380{ 380{
381 return single_open(file, can_rcvlist_proc_show, PDE(inode)->data); 381 return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
382} 382}
383 383
384static const struct file_operations can_rcvlist_proc_fops = { 384static const struct file_operations can_rcvlist_proc_fops = {
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef435e11b..958d9856912c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
11 crypto.o armor.o \ 11 crypto.o armor.o \
12 auth_x.o \ 12 auth_x.o \
13 ceph_fs.o ceph_strings.o ceph_hash.o \ 13 ceph_fs.o ceph_strings.o ceph_hash.o \
14 pagevec.o 14 pagevec.o snapshot.o
15 15
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac090f1..6b923bcaa2a4 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
47 if (!ac) 47 if (!ac)
48 goto out; 48 goto out;
49 49
50 mutex_init(&ac->mutex);
50 ac->negotiating = true; 51 ac->negotiating = true;
51 if (name) 52 if (name)
52 ac->name = name; 53 ac->name = name;
@@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac)
73 */ 74 */
74void ceph_auth_reset(struct ceph_auth_client *ac) 75void ceph_auth_reset(struct ceph_auth_client *ac)
75{ 76{
77 mutex_lock(&ac->mutex);
76 dout("auth_reset %p\n", ac); 78 dout("auth_reset %p\n", ac);
77 if (ac->ops && !ac->negotiating) 79 if (ac->ops && !ac->negotiating)
78 ac->ops->reset(ac); 80 ac->ops->reset(ac);
79 ac->negotiating = true; 81 ac->negotiating = true;
82 mutex_unlock(&ac->mutex);
80} 83}
81 84
82int ceph_entity_name_encode(const char *name, void **p, void *end) 85int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
102 int i, num; 105 int i, num;
103 int ret; 106 int ret;
104 107
108 mutex_lock(&ac->mutex);
105 dout("auth_build_hello\n"); 109 dout("auth_build_hello\n");
106 monhdr->have_version = 0; 110 monhdr->have_version = 0;
107 monhdr->session_mon = cpu_to_le16(-1); 111 monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
122 126
123 ret = ceph_entity_name_encode(ac->name, &p, end); 127 ret = ceph_entity_name_encode(ac->name, &p, end);
124 if (ret < 0) 128 if (ret < 0)
125 return ret; 129 goto out;
126 ceph_decode_need(&p, end, sizeof(u64), bad); 130 ceph_decode_need(&p, end, sizeof(u64), bad);
127 ceph_encode_64(&p, ac->global_id); 131 ceph_encode_64(&p, ac->global_id);
128 132
129 ceph_encode_32(&lenp, p - lenp - sizeof(u32)); 133 ceph_encode_32(&lenp, p - lenp - sizeof(u32));
130 return p - buf; 134 ret = p - buf;
135out:
136 mutex_unlock(&ac->mutex);
137 return ret;
131 138
132bad: 139bad:
133 return -ERANGE; 140 ret = -ERANGE;
141 goto out;
134} 142}
135 143
136static int ceph_build_auth_request(struct ceph_auth_client *ac, 144static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac,
151 if (ret < 0) { 159 if (ret < 0) {
152 pr_err("error %d building auth method %s request\n", ret, 160 pr_err("error %d building auth method %s request\n", ret,
153 ac->ops->name); 161 ac->ops->name);
154 return ret; 162 goto out;
155 } 163 }
156 dout(" built request %d bytes\n", ret); 164 dout(" built request %d bytes\n", ret);
157 ceph_encode_32(&p, ret); 165 ceph_encode_32(&p, ret);
158 return p + ret - msg_buf; 166 ret = p + ret - msg_buf;
167out:
168 return ret;
159} 169}
160 170
161/* 171/*
@@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
176 int result_msg_len; 186 int result_msg_len;
177 int ret = -EINVAL; 187 int ret = -EINVAL;
178 188
189 mutex_lock(&ac->mutex);
179 dout("handle_auth_reply %p %p\n", p, end); 190 dout("handle_auth_reply %p %p\n", p, end);
180 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); 191 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
181 protocol = ceph_decode_32(&p); 192 protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
227 238
228 ret = ac->ops->handle_reply(ac, result, payload, payload_end); 239 ret = ac->ops->handle_reply(ac, result, payload, payload_end);
229 if (ret == -EAGAIN) { 240 if (ret == -EAGAIN) {
230 return ceph_build_auth_request(ac, reply_buf, reply_len); 241 ret = ceph_build_auth_request(ac, reply_buf, reply_len);
231 } else if (ret) { 242 } else if (ret) {
232 pr_err("auth method '%s' error %d\n", ac->ops->name, ret); 243 pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
233 return ret;
234 } 244 }
235 return 0;
236 245
237bad:
238 pr_err("failed to decode auth msg\n");
239out: 246out:
247 mutex_unlock(&ac->mutex);
240 return ret; 248 return ret;
249
250bad:
251 pr_err("failed to decode auth msg\n");
252 ret = -EINVAL;
253 goto out;
241} 254}
242 255
243int ceph_build_auth(struct ceph_auth_client *ac, 256int ceph_build_auth(struct ceph_auth_client *ac,
244 void *msg_buf, size_t msg_len) 257 void *msg_buf, size_t msg_len)
245{ 258{
259 int ret = 0;
260
261 mutex_lock(&ac->mutex);
246 if (!ac->protocol) 262 if (!ac->protocol)
247 return ceph_auth_build_hello(ac, msg_buf, msg_len); 263 ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
248 BUG_ON(!ac->ops); 264 else if (ac->ops->should_authenticate(ac))
249 if (ac->ops->should_authenticate(ac)) 265 ret = ceph_build_auth_request(ac, msg_buf, msg_len);
250 return ceph_build_auth_request(ac, msg_buf, msg_len); 266 mutex_unlock(&ac->mutex);
251 return 0; 267 return ret;
252} 268}
253 269
254int ceph_auth_is_authenticated(struct ceph_auth_client *ac) 270int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
255{ 271{
256 if (!ac->ops) 272 int ret = 0;
257 return 0; 273
258 return ac->ops->is_authenticated(ac); 274 mutex_lock(&ac->mutex);
275 if (ac->ops)
276 ret = ac->ops->is_authenticated(ac);
277 mutex_unlock(&ac->mutex);
278 return ret;
279}
280EXPORT_SYMBOL(ceph_auth_is_authenticated);
281
282int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
283 int peer_type,
284 struct ceph_auth_handshake *auth)
285{
286 int ret = 0;
287
288 mutex_lock(&ac->mutex);
289 if (ac->ops && ac->ops->create_authorizer)
290 ret = ac->ops->create_authorizer(ac, peer_type, auth);
291 mutex_unlock(&ac->mutex);
292 return ret;
293}
294EXPORT_SYMBOL(ceph_auth_create_authorizer);
295
296void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
297 struct ceph_authorizer *a)
298{
299 mutex_lock(&ac->mutex);
300 if (ac->ops && ac->ops->destroy_authorizer)
301 ac->ops->destroy_authorizer(ac, a);
302 mutex_unlock(&ac->mutex);
303}
304EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
305
306int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
307 int peer_type,
308 struct ceph_auth_handshake *a)
309{
310 int ret = 0;
311
312 mutex_lock(&ac->mutex);
313 if (ac->ops && ac->ops->update_authorizer)
314 ret = ac->ops->update_authorizer(ac, peer_type, a);
315 mutex_unlock(&ac->mutex);
316 return ret;
317}
318EXPORT_SYMBOL(ceph_auth_update_authorizer);
319
320int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
321 struct ceph_authorizer *a, size_t len)
322{
323 int ret = 0;
324
325 mutex_lock(&ac->mutex);
326 if (ac->ops && ac->ops->verify_authorizer_reply)
327 ret = ac->ops->verify_authorizer_reply(ac, a, len);
328 mutex_unlock(&ac->mutex);
329 return ret;
330}
331EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
332
333void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
334{
335 mutex_lock(&ac->mutex);
336 if (ac->ops && ac->ops->invalidate_authorizer)
337 ac->ops->invalidate_authorizer(ac, peer_type);
338 mutex_unlock(&ac->mutex);
259} 339}
340EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14eb027..96238ba95f2b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
298 return -ENOMEM; 298 return -ENOMEM;
299 } 299 }
300 au->service = th->service; 300 au->service = th->service;
301 au->secret_id = th->secret_id;
301 302
302 msg_a = au->buf->vec.iov_base; 303 msg_a = au->buf->vec.iov_base;
303 msg_a->struct_v = 1; 304 msg_a->struct_v = 1;
@@ -555,6 +556,26 @@ static int ceph_x_create_authorizer(
555 return 0; 556 return 0;
556} 557}
557 558
559static int ceph_x_update_authorizer(
560 struct ceph_auth_client *ac, int peer_type,
561 struct ceph_auth_handshake *auth)
562{
563 struct ceph_x_authorizer *au;
564 struct ceph_x_ticket_handler *th;
565
566 th = get_ticket_handler(ac, peer_type);
567 if (IS_ERR(th))
568 return PTR_ERR(th);
569
570 au = (struct ceph_x_authorizer *)auth->authorizer;
571 if (au->secret_id < th->secret_id) {
572 dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
573 au->service, au->secret_id, th->secret_id);
574 return ceph_x_build_authorizer(ac, th, au);
575 }
576 return 0;
577}
578
558static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, 579static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
559 struct ceph_authorizer *a, size_t len) 580 struct ceph_authorizer *a, size_t len)
560{ 581{
@@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
630 651
631 th = get_ticket_handler(ac, peer_type); 652 th = get_ticket_handler(ac, peer_type);
632 if (!IS_ERR(th)) 653 if (!IS_ERR(th))
633 remove_ticket_handler(ac, th); 654 memset(&th->validity, 0, sizeof(th->validity));
634} 655}
635 656
636 657
@@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
641 .build_request = ceph_x_build_request, 662 .build_request = ceph_x_build_request,
642 .handle_reply = ceph_x_handle_reply, 663 .handle_reply = ceph_x_handle_reply,
643 .create_authorizer = ceph_x_create_authorizer, 664 .create_authorizer = ceph_x_create_authorizer,
665 .update_authorizer = ceph_x_update_authorizer,
644 .verify_authorizer_reply = ceph_x_verify_authorizer_reply, 666 .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
645 .destroy_authorizer = ceph_x_destroy_authorizer, 667 .destroy_authorizer = ceph_x_destroy_authorizer,
646 .invalidate_authorizer = ceph_x_invalidate_authorizer, 668 .invalidate_authorizer = ceph_x_invalidate_authorizer,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93b774f..c5a058da7ac8 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -29,6 +29,7 @@ struct ceph_x_authorizer {
29 struct ceph_buffer *buf; 29 struct ceph_buffer *buf;
30 unsigned int service; 30 unsigned int service;
31 u64 nonce; 31 u64 nonce;
32 u64 secret_id;
32 char reply_buf[128]; /* big enough for encrypted blob */ 33 char reply_buf[128]; /* big enough for encrypted blob */
33}; 34};
34 35
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index e65e6e4be38b..34b11ee8124e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -606,11 +606,17 @@ static int __init init_ceph_lib(void)
606 if (ret < 0) 606 if (ret < 0)
607 goto out_crypto; 607 goto out_crypto;
608 608
609 ret = ceph_osdc_setup();
610 if (ret < 0)
611 goto out_msgr;
612
609 pr_info("loaded (mon/osd proto %d/%d)\n", 613 pr_info("loaded (mon/osd proto %d/%d)\n",
610 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); 614 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
611 615
612 return 0; 616 return 0;
613 617
618out_msgr:
619 ceph_msgr_exit();
614out_crypto: 620out_crypto:
615 ceph_crypto_shutdown(); 621 ceph_crypto_shutdown();
616out_debugfs: 622out_debugfs:
@@ -622,6 +628,7 @@ out:
622static void __exit exit_ceph_lib(void) 628static void __exit exit_ceph_lib(void)
623{ 629{
624 dout("exit_ceph_lib\n"); 630 dout("exit_ceph_lib\n");
631 ceph_osdc_cleanup();
625 ceph_msgr_exit(); 632 ceph_msgr_exit();
626 ceph_crypto_shutdown(); 633 ceph_crypto_shutdown();
627 ceph_debugfs_cleanup(); 634 ceph_debugfs_cleanup();
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f4894e..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
123 mutex_lock(&osdc->request_mutex); 123 mutex_lock(&osdc->request_mutex);
124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
125 struct ceph_osd_request *req; 125 struct ceph_osd_request *req;
126 unsigned int i;
126 int opcode; 127 int opcode;
127 int i;
128 128
129 req = rb_entry(p, struct ceph_osd_request, r_node); 129 req = rb_entry(p, struct ceph_osd_request, r_node);
130 130
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
142 seq_printf(s, "\t"); 142 seq_printf(s, "\t");
143 143
144 for (i = 0; i < req->r_num_ops; i++) { 144 for (i = 0; i < req->r_num_ops; i++) {
145 opcode = le16_to_cpu(req->r_request_ops[i].op); 145 opcode = req->r_ops[i].op;
146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); 146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
147 } 147 }
148 148
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2c0669fb54e3..eb0a46a49bd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -21,6 +21,9 @@
21#include <linux/ceph/pagelist.h> 21#include <linux/ceph/pagelist.h>
22#include <linux/export.h> 22#include <linux/export.h>
23 23
24#define list_entry_next(pos, member) \
25 list_entry(pos->member.next, typeof(*pos), member)
26
24/* 27/*
25 * Ceph uses the messenger to exchange ceph_msg messages with other 28 * Ceph uses the messenger to exchange ceph_msg messages with other
26 * hosts in the system. The messenger provides ordered and reliable 29 * hosts in the system. The messenger provides ordered and reliable
@@ -149,6 +152,11 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
149 return test_and_set_bit(con_flag, &con->flags); 152 return test_and_set_bit(con_flag, &con->flags);
150} 153}
151 154
155/* Slab caches for frequently-allocated structures */
156
157static struct kmem_cache *ceph_msg_cache;
158static struct kmem_cache *ceph_msg_data_cache;
159
152/* static tag bytes (protocol control messages) */ 160/* static tag bytes (protocol control messages) */
153static char tag_msg = CEPH_MSGR_TAG_MSG; 161static char tag_msg = CEPH_MSGR_TAG_MSG;
154static char tag_ack = CEPH_MSGR_TAG_ACK; 162static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -223,6 +231,41 @@ static void encode_my_addr(struct ceph_messenger *msgr)
223 */ 231 */
224static struct workqueue_struct *ceph_msgr_wq; 232static struct workqueue_struct *ceph_msgr_wq;
225 233
234static int ceph_msgr_slab_init(void)
235{
236 BUG_ON(ceph_msg_cache);
237 ceph_msg_cache = kmem_cache_create("ceph_msg",
238 sizeof (struct ceph_msg),
239 __alignof__(struct ceph_msg), 0, NULL);
240
241 if (!ceph_msg_cache)
242 return -ENOMEM;
243
244 BUG_ON(ceph_msg_data_cache);
245 ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
246 sizeof (struct ceph_msg_data),
247 __alignof__(struct ceph_msg_data),
248 0, NULL);
249 if (ceph_msg_data_cache)
250 return 0;
251
252 kmem_cache_destroy(ceph_msg_cache);
253 ceph_msg_cache = NULL;
254
255 return -ENOMEM;
256}
257
258static void ceph_msgr_slab_exit(void)
259{
260 BUG_ON(!ceph_msg_data_cache);
261 kmem_cache_destroy(ceph_msg_data_cache);
262 ceph_msg_data_cache = NULL;
263
264 BUG_ON(!ceph_msg_cache);
265 kmem_cache_destroy(ceph_msg_cache);
266 ceph_msg_cache = NULL;
267}
268
226static void _ceph_msgr_exit(void) 269static void _ceph_msgr_exit(void)
227{ 270{
228 if (ceph_msgr_wq) { 271 if (ceph_msgr_wq) {
@@ -230,6 +273,8 @@ static void _ceph_msgr_exit(void)
230 ceph_msgr_wq = NULL; 273 ceph_msgr_wq = NULL;
231 } 274 }
232 275
276 ceph_msgr_slab_exit();
277
233 BUG_ON(zero_page == NULL); 278 BUG_ON(zero_page == NULL);
234 kunmap(zero_page); 279 kunmap(zero_page);
235 page_cache_release(zero_page); 280 page_cache_release(zero_page);
@@ -242,6 +287,9 @@ int ceph_msgr_init(void)
242 zero_page = ZERO_PAGE(0); 287 zero_page = ZERO_PAGE(0);
243 page_cache_get(zero_page); 288 page_cache_get(zero_page);
244 289
290 if (ceph_msgr_slab_init())
291 return -ENOMEM;
292
245 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); 293 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
246 if (ceph_msgr_wq) 294 if (ceph_msgr_wq)
247 return 0; 295 return 0;
@@ -471,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
471 return r; 519 return r;
472} 520}
473 521
522static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
523 int page_offset, size_t length)
524{
525 void *kaddr;
526 int ret;
527
528 BUG_ON(page_offset + length > PAGE_SIZE);
529
530 kaddr = kmap(page);
531 BUG_ON(!kaddr);
532 ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
533 kunmap(page);
534
535 return ret;
536}
537
474/* 538/*
475 * write something. @more is true if caller will be sending more data 539 * write something. @more is true if caller will be sending more data
476 * shortly. 540 * shortly.
@@ -493,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
493} 557}
494 558
495static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 559static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
496 int offset, size_t size, int more) 560 int offset, size_t size, bool more)
497{ 561{
498 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); 562 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
499 int ret; 563 int ret;
@@ -697,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con,
697} 761}
698 762
699#ifdef CONFIG_BLOCK 763#ifdef CONFIG_BLOCK
700static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) 764
765/*
766 * For a bio data item, a piece is whatever remains of the next
767 * entry in the current bio iovec, or the first entry in the next
768 * bio in the list.
769 */
770static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
771 size_t length)
701{ 772{
702 if (!bio) { 773 struct ceph_msg_data *data = cursor->data;
703 *iter = NULL; 774 struct bio *bio;
704 *seg = 0; 775
705 return; 776 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
777
778 bio = data->bio;
779 BUG_ON(!bio);
780 BUG_ON(!bio->bi_vcnt);
781
782 cursor->resid = min(length, data->bio_length);
783 cursor->bio = bio;
784 cursor->vector_index = 0;
785 cursor->vector_offset = 0;
786 cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
787}
788
789static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
790 size_t *page_offset,
791 size_t *length)
792{
793 struct ceph_msg_data *data = cursor->data;
794 struct bio *bio;
795 struct bio_vec *bio_vec;
796 unsigned int index;
797
798 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
799
800 bio = cursor->bio;
801 BUG_ON(!bio);
802
803 index = cursor->vector_index;
804 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
805
806 bio_vec = &bio->bi_io_vec[index];
807 BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
808 *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
809 BUG_ON(*page_offset >= PAGE_SIZE);
810 if (cursor->last_piece) /* pagelist offset is always 0 */
811 *length = cursor->resid;
812 else
813 *length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
814 BUG_ON(*length > cursor->resid);
815 BUG_ON(*page_offset + *length > PAGE_SIZE);
816
817 return bio_vec->bv_page;
818}
819
820static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
821 size_t bytes)
822{
823 struct bio *bio;
824 struct bio_vec *bio_vec;
825 unsigned int index;
826
827 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
828
829 bio = cursor->bio;
830 BUG_ON(!bio);
831
832 index = cursor->vector_index;
833 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
834 bio_vec = &bio->bi_io_vec[index];
835
836 /* Advance the cursor offset */
837
838 BUG_ON(cursor->resid < bytes);
839 cursor->resid -= bytes;
840 cursor->vector_offset += bytes;
841 if (cursor->vector_offset < bio_vec->bv_len)
842 return false; /* more bytes to process in this segment */
843 BUG_ON(cursor->vector_offset != bio_vec->bv_len);
844
845 /* Move on to the next segment, and possibly the next bio */
846
847 if (++index == (unsigned int) bio->bi_vcnt) {
848 bio = bio->bi_next;
849 index = 0;
706 } 850 }
707 *iter = bio; 851 cursor->bio = bio;
708 *seg = bio->bi_idx; 852 cursor->vector_index = index;
853 cursor->vector_offset = 0;
854
855 if (!cursor->last_piece) {
856 BUG_ON(!cursor->resid);
857 BUG_ON(!bio);
858 /* A short read is OK, so use <= rather than == */
859 if (cursor->resid <= bio->bi_io_vec[index].bv_len)
860 cursor->last_piece = true;
861 }
862
863 return true;
709} 864}
865#endif /* CONFIG_BLOCK */
710 866
711static void iter_bio_next(struct bio **bio_iter, int *seg) 867/*
868 * For a page array, a piece comes from the first page in the array
869 * that has not already been fully consumed.
870 */
871static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
872 size_t length)
712{ 873{
713 if (*bio_iter == NULL) 874 struct ceph_msg_data *data = cursor->data;
714 return; 875 int page_count;
876
877 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
715 878
716 BUG_ON(*seg >= (*bio_iter)->bi_vcnt); 879 BUG_ON(!data->pages);
880 BUG_ON(!data->length);
717 881
718 (*seg)++; 882 cursor->resid = min(length, data->length);
719 if (*seg == (*bio_iter)->bi_vcnt) 883 page_count = calc_pages_for(data->alignment, (u64)data->length);
720 init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); 884 cursor->page_offset = data->alignment & ~PAGE_MASK;
885 cursor->page_index = 0;
886 BUG_ON(page_count > (int)USHRT_MAX);
887 cursor->page_count = (unsigned short)page_count;
888 BUG_ON(length > SIZE_MAX - cursor->page_offset);
889 cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
721} 890}
722#endif
723 891
724static void prepare_write_message_data(struct ceph_connection *con) 892static struct page *
893ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
894 size_t *page_offset, size_t *length)
725{ 895{
726 struct ceph_msg *msg = con->out_msg; 896 struct ceph_msg_data *data = cursor->data;
727 897
728 BUG_ON(!msg); 898 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
729 BUG_ON(!msg->hdr.data_len); 899
900 BUG_ON(cursor->page_index >= cursor->page_count);
901 BUG_ON(cursor->page_offset >= PAGE_SIZE);
902
903 *page_offset = cursor->page_offset;
904 if (cursor->last_piece)
905 *length = cursor->resid;
906 else
907 *length = PAGE_SIZE - *page_offset;
908
909 return data->pages[cursor->page_index];
910}
911
912static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
913 size_t bytes)
914{
915 BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
916
917 BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
918
919 /* Advance the cursor page offset */
920
921 cursor->resid -= bytes;
922 cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
923 if (!bytes || cursor->page_offset)
924 return false; /* more bytes to process in the current page */
925
926 /* Move on to the next page; offset is already at 0 */
927
928 BUG_ON(cursor->page_index >= cursor->page_count);
929 cursor->page_index++;
930 cursor->last_piece = cursor->resid <= PAGE_SIZE;
931
932 return true;
933}
934
935/*
936 * For a pagelist, a piece is whatever remains to be consumed in the
937 * first page in the list, or the front of the next page.
938 */
939static void
940ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
941 size_t length)
942{
943 struct ceph_msg_data *data = cursor->data;
944 struct ceph_pagelist *pagelist;
945 struct page *page;
946
947 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
948
949 pagelist = data->pagelist;
950 BUG_ON(!pagelist);
951
952 if (!length)
953 return; /* pagelist can be assigned but empty */
954
955 BUG_ON(list_empty(&pagelist->head));
956 page = list_first_entry(&pagelist->head, struct page, lru);
957
958 cursor->resid = min(length, pagelist->length);
959 cursor->page = page;
960 cursor->offset = 0;
961 cursor->last_piece = cursor->resid <= PAGE_SIZE;
962}
963
964static struct page *
965ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
966 size_t *page_offset, size_t *length)
967{
968 struct ceph_msg_data *data = cursor->data;
969 struct ceph_pagelist *pagelist;
970
971 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
730 972
731 /* initialize page iterator */ 973 pagelist = data->pagelist;
732 con->out_msg_pos.page = 0; 974 BUG_ON(!pagelist);
733 if (msg->pages) 975
734 con->out_msg_pos.page_pos = msg->page_alignment; 976 BUG_ON(!cursor->page);
977 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
978
979 /* offset of first page in pagelist is always 0 */
980 *page_offset = cursor->offset & ~PAGE_MASK;
981 if (cursor->last_piece)
982 *length = cursor->resid;
735 else 983 else
736 con->out_msg_pos.page_pos = 0; 984 *length = PAGE_SIZE - *page_offset;
985
986 return cursor->page;
987}
988
989static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
990 size_t bytes)
991{
992 struct ceph_msg_data *data = cursor->data;
993 struct ceph_pagelist *pagelist;
994
995 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
996
997 pagelist = data->pagelist;
998 BUG_ON(!pagelist);
999
1000 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
1001 BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
1002
1003 /* Advance the cursor offset */
1004
1005 cursor->resid -= bytes;
1006 cursor->offset += bytes;
1007 /* offset of first page in pagelist is always 0 */
1008 if (!bytes || cursor->offset & ~PAGE_MASK)
1009 return false; /* more bytes to process in the current page */
1010
1011 /* Move on to the next page */
1012
1013 BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
1014 cursor->page = list_entry_next(cursor->page, lru);
1015 cursor->last_piece = cursor->resid <= PAGE_SIZE;
1016
1017 return true;
1018}
1019
1020/*
1021 * Message data is handled (sent or received) in pieces, where each
1022 * piece resides on a single page. The network layer might not
1023 * consume an entire piece at once. A data item's cursor keeps
1024 * track of which piece is next to process and how much remains to
1025 * be processed in that piece. It also tracks whether the current
1026 * piece is the last one in the data item.
1027 */
1028static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
1029{
1030 size_t length = cursor->total_resid;
1031
1032 switch (cursor->data->type) {
1033 case CEPH_MSG_DATA_PAGELIST:
1034 ceph_msg_data_pagelist_cursor_init(cursor, length);
1035 break;
1036 case CEPH_MSG_DATA_PAGES:
1037 ceph_msg_data_pages_cursor_init(cursor, length);
1038 break;
737#ifdef CONFIG_BLOCK 1039#ifdef CONFIG_BLOCK
738 if (msg->bio) 1040 case CEPH_MSG_DATA_BIO:
739 init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); 1041 ceph_msg_data_bio_cursor_init(cursor, length);
740#endif 1042 break;
741 con->out_msg_pos.data_pos = 0; 1043#endif /* CONFIG_BLOCK */
742 con->out_msg_pos.did_page_crc = false; 1044 case CEPH_MSG_DATA_NONE:
743 con->out_more = 1; /* data + footer will follow */ 1045 default:
1046 /* BUG(); */
1047 break;
1048 }
1049 cursor->need_crc = true;
1050}
1051
1052static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
1053{
1054 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1055 struct ceph_msg_data *data;
1056
1057 BUG_ON(!length);
1058 BUG_ON(length > msg->data_length);
1059 BUG_ON(list_empty(&msg->data));
1060
1061 cursor->data_head = &msg->data;
1062 cursor->total_resid = length;
1063 data = list_first_entry(&msg->data, struct ceph_msg_data, links);
1064 cursor->data = data;
1065
1066 __ceph_msg_data_cursor_init(cursor);
1067}
1068
1069/*
1070 * Return the page containing the next piece to process for a given
1071 * data item, and supply the page offset and length of that piece.
1072 * Indicate whether this is the last piece in this data item.
1073 */
1074static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
1075 size_t *page_offset, size_t *length,
1076 bool *last_piece)
1077{
1078 struct page *page;
1079
1080 switch (cursor->data->type) {
1081 case CEPH_MSG_DATA_PAGELIST:
1082 page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
1083 break;
1084 case CEPH_MSG_DATA_PAGES:
1085 page = ceph_msg_data_pages_next(cursor, page_offset, length);
1086 break;
1087#ifdef CONFIG_BLOCK
1088 case CEPH_MSG_DATA_BIO:
1089 page = ceph_msg_data_bio_next(cursor, page_offset, length);
1090 break;
1091#endif /* CONFIG_BLOCK */
1092 case CEPH_MSG_DATA_NONE:
1093 default:
1094 page = NULL;
1095 break;
1096 }
1097 BUG_ON(!page);
1098 BUG_ON(*page_offset + *length > PAGE_SIZE);
1099 BUG_ON(!*length);
1100 if (last_piece)
1101 *last_piece = cursor->last_piece;
1102
1103 return page;
1104}
1105
1106/*
1107 * Returns true if the result moves the cursor on to the next piece
1108 * of the data item.
1109 */
1110static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
1111 size_t bytes)
1112{
1113 bool new_piece;
1114
1115 BUG_ON(bytes > cursor->resid);
1116 switch (cursor->data->type) {
1117 case CEPH_MSG_DATA_PAGELIST:
1118 new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
1119 break;
1120 case CEPH_MSG_DATA_PAGES:
1121 new_piece = ceph_msg_data_pages_advance(cursor, bytes);
1122 break;
1123#ifdef CONFIG_BLOCK
1124 case CEPH_MSG_DATA_BIO:
1125 new_piece = ceph_msg_data_bio_advance(cursor, bytes);
1126 break;
1127#endif /* CONFIG_BLOCK */
1128 case CEPH_MSG_DATA_NONE:
1129 default:
1130 BUG();
1131 break;
1132 }
1133 cursor->total_resid -= bytes;
1134
1135 if (!cursor->resid && cursor->total_resid) {
1136 WARN_ON(!cursor->last_piece);
1137 BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
1138 cursor->data = list_entry_next(cursor->data, links);
1139 __ceph_msg_data_cursor_init(cursor);
1140 new_piece = true;
1141 }
1142 cursor->need_crc = new_piece;
1143
1144 return new_piece;
1145}
1146
1147static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1148{
1149 BUG_ON(!msg);
1150 BUG_ON(!data_len);
1151
1152 /* Initialize data cursor */
1153
1154 ceph_msg_data_cursor_init(msg, (size_t)data_len);
744} 1155}
745 1156
746/* 1157/*
@@ -803,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con)
803 m->hdr.seq = cpu_to_le64(++con->out_seq); 1214 m->hdr.seq = cpu_to_le64(++con->out_seq);
804 m->needs_out_seq = false; 1215 m->needs_out_seq = false;
805 } 1216 }
806#ifdef CONFIG_BLOCK 1217 WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
807 else
808 m->bio_iter = NULL;
809#endif
810 1218
811 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 1219 dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
812 m, con->out_seq, le16_to_cpu(m->hdr.type), 1220 m, con->out_seq, le16_to_cpu(m->hdr.type),
813 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 1221 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
814 le32_to_cpu(m->hdr.data_len), 1222 m->data_length);
815 m->nr_pages);
816 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); 1223 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
817 1224
818 /* tag + hdr + front + middle */ 1225 /* tag + hdr + front + middle */
@@ -843,11 +1250,13 @@ static void prepare_write_message(struct ceph_connection *con)
843 1250
844 /* is there a data payload? */ 1251 /* is there a data payload? */
845 con->out_msg->footer.data_crc = 0; 1252 con->out_msg->footer.data_crc = 0;
846 if (m->hdr.data_len) 1253 if (m->data_length) {
847 prepare_write_message_data(con); 1254 prepare_message_data(con->out_msg, m->data_length);
848 else 1255 con->out_more = 1; /* data + footer will follow */
1256 } else {
849 /* no, queue up footer too and be done */ 1257 /* no, queue up footer too and be done */
850 prepare_write_message_footer(con); 1258 prepare_write_message_footer(con);
1259 }
851 1260
852 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1261 con_flag_set(con, CON_FLAG_WRITE_PENDING);
853} 1262}
@@ -874,6 +1283,24 @@ static void prepare_write_ack(struct ceph_connection *con)
874} 1283}
875 1284
876/* 1285/*
1286 * Prepare to share the seq during handshake
1287 */
1288static void prepare_write_seq(struct ceph_connection *con)
1289{
1290 dout("prepare_write_seq %p %llu -> %llu\n", con,
1291 con->in_seq_acked, con->in_seq);
1292 con->in_seq_acked = con->in_seq;
1293
1294 con_out_kvec_reset(con);
1295
1296 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
1297 con_out_kvec_add(con, sizeof (con->out_temp_ack),
1298 &con->out_temp_ack);
1299
1300 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1301}
1302
1303/*
877 * Prepare to write keepalive byte. 1304 * Prepare to write keepalive byte.
878 */ 1305 */
879static void prepare_write_keepalive(struct ceph_connection *con) 1306static void prepare_write_keepalive(struct ceph_connection *con)
@@ -1022,35 +1449,19 @@ out:
1022 return ret; /* done! */ 1449 return ret; /* done! */
1023} 1450}
1024 1451
1025static void out_msg_pos_next(struct ceph_connection *con, struct page *page, 1452static u32 ceph_crc32c_page(u32 crc, struct page *page,
1026 size_t len, size_t sent, bool in_trail) 1453 unsigned int page_offset,
1454 unsigned int length)
1027{ 1455{
1028 struct ceph_msg *msg = con->out_msg; 1456 char *kaddr;
1029 1457
1030 BUG_ON(!msg); 1458 kaddr = kmap(page);
1031 BUG_ON(!sent); 1459 BUG_ON(kaddr == NULL);
1032 1460 crc = crc32c(crc, kaddr + page_offset, length);
1033 con->out_msg_pos.data_pos += sent; 1461 kunmap(page);
1034 con->out_msg_pos.page_pos += sent;
1035 if (sent < len)
1036 return;
1037 1462
1038 BUG_ON(sent != len); 1463 return crc;
1039 con->out_msg_pos.page_pos = 0;
1040 con->out_msg_pos.page++;
1041 con->out_msg_pos.did_page_crc = false;
1042 if (in_trail)
1043 list_move_tail(&page->lru,
1044 &msg->trail->head);
1045 else if (msg->pagelist)
1046 list_move_tail(&page->lru,
1047 &msg->pagelist->head);
1048#ifdef CONFIG_BLOCK
1049 else if (msg->bio)
1050 iter_bio_next(&msg->bio_iter, &msg->bio_seg);
1051#endif
1052} 1464}
1053
1054/* 1465/*
1055 * Write as much message data payload as we can. If we finish, queue 1466 * Write as much message data payload as we can. If we finish, queue
1056 * up the footer. 1467 * up the footer.
@@ -1058,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
1058 * 0 -> socket full, but more to do 1469 * 0 -> socket full, but more to do
1059 * <0 -> error 1470 * <0 -> error
1060 */ 1471 */
1061static int write_partial_msg_pages(struct ceph_connection *con) 1472static int write_partial_message_data(struct ceph_connection *con)
1062{ 1473{
1063 struct ceph_msg *msg = con->out_msg; 1474 struct ceph_msg *msg = con->out_msg;
1064 unsigned int data_len = le32_to_cpu(msg->hdr.data_len); 1475 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1065 size_t len;
1066 bool do_datacrc = !con->msgr->nocrc; 1476 bool do_datacrc = !con->msgr->nocrc;
1067 int ret; 1477 u32 crc;
1068 int total_max_write;
1069 bool in_trail = false;
1070 const size_t trail_len = (msg->trail ? msg->trail->length : 0);
1071 const size_t trail_off = data_len - trail_len;
1072 1478
1073 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", 1479 dout("%s %p msg %p\n", __func__, con, msg);
1074 con, msg, con->out_msg_pos.page, msg->nr_pages, 1480
1075 con->out_msg_pos.page_pos); 1481 if (list_empty(&msg->data))
1482 return -EINVAL;
1076 1483
1077 /* 1484 /*
1078 * Iterate through each page that contains data to be 1485 * Iterate through each page that contains data to be
@@ -1082,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con)
1082 * need to map the page. If we have no pages, they have 1489 * need to map the page. If we have no pages, they have
1083 * been revoked, so use the zero page. 1490 * been revoked, so use the zero page.
1084 */ 1491 */
1085 while (data_len > con->out_msg_pos.data_pos) { 1492 crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
1086 struct page *page = NULL; 1493 while (cursor->resid) {
1087 int max_write = PAGE_SIZE; 1494 struct page *page;
1088 int bio_offset = 0; 1495 size_t page_offset;
1089 1496 size_t length;
1090 in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; 1497 bool last_piece;
1091 if (!in_trail) 1498 bool need_crc;
1092 total_max_write = trail_off - con->out_msg_pos.data_pos; 1499 int ret;
1093
1094 if (in_trail) {
1095 total_max_write = data_len - con->out_msg_pos.data_pos;
1096
1097 page = list_first_entry(&msg->trail->head,
1098 struct page, lru);
1099 } else if (msg->pages) {
1100 page = msg->pages[con->out_msg_pos.page];
1101 } else if (msg->pagelist) {
1102 page = list_first_entry(&msg->pagelist->head,
1103 struct page, lru);
1104#ifdef CONFIG_BLOCK
1105 } else if (msg->bio) {
1106 struct bio_vec *bv;
1107 1500
1108 bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); 1501 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
1109 page = bv->bv_page; 1502 &last_piece);
1110 bio_offset = bv->bv_offset; 1503 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1111 max_write = bv->bv_len; 1504 length, last_piece);
1112#endif 1505 if (ret <= 0) {
1113 } else { 1506 if (do_datacrc)
1114 page = zero_page; 1507 msg->footer.data_crc = cpu_to_le32(crc);
1115 }
1116 len = min_t(int, max_write - con->out_msg_pos.page_pos,
1117 total_max_write);
1118
1119 if (do_datacrc && !con->out_msg_pos.did_page_crc) {
1120 void *base;
1121 u32 crc = le32_to_cpu(msg->footer.data_crc);
1122 char *kaddr;
1123
1124 kaddr = kmap(page);
1125 BUG_ON(kaddr == NULL);
1126 base = kaddr + con->out_msg_pos.page_pos + bio_offset;
1127 crc = crc32c(crc, base, len);
1128 kunmap(page);
1129 msg->footer.data_crc = cpu_to_le32(crc);
1130 con->out_msg_pos.did_page_crc = true;
1131 }
1132 ret = ceph_tcp_sendpage(con->sock, page,
1133 con->out_msg_pos.page_pos + bio_offset,
1134 len, 1);
1135 if (ret <= 0)
1136 goto out;
1137 1508
1138 out_msg_pos_next(con, page, len, (size_t) ret, in_trail); 1509 return ret;
1510 }
1511 if (do_datacrc && cursor->need_crc)
1512 crc = ceph_crc32c_page(crc, page, page_offset, length);
1513 need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1139 } 1514 }
1140 1515
1141 dout("write_partial_msg_pages %p msg %p done\n", con, msg); 1516 dout("%s %p msg %p done\n", __func__, con, msg);
1142 1517
1143 /* prepare and queue up footer, too */ 1518 /* prepare and queue up footer, too */
1144 if (!do_datacrc) 1519 if (do_datacrc)
1520 msg->footer.data_crc = cpu_to_le32(crc);
1521 else
1145 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 1522 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
1146 con_out_kvec_reset(con); 1523 con_out_kvec_reset(con);
1147 prepare_write_message_footer(con); 1524 prepare_write_message_footer(con);
1148 ret = 1; 1525
1149out: 1526 return 1; /* must return > 0 to indicate success */
1150 return ret;
1151} 1527}
1152 1528
1153/* 1529/*
@@ -1160,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con)
1160 while (con->out_skip > 0) { 1536 while (con->out_skip > 0) {
1161 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); 1537 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
1162 1538
1163 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); 1539 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
1164 if (ret <= 0) 1540 if (ret <= 0)
1165 goto out; 1541 goto out;
1166 con->out_skip -= ret; 1542 con->out_skip -= ret;
@@ -1191,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con)
1191 con->in_base_pos = 0; 1567 con->in_base_pos = 0;
1192} 1568}
1193 1569
1570static void prepare_read_seq(struct ceph_connection *con)
1571{
1572 dout("prepare_read_seq %p\n", con);
1573 con->in_base_pos = 0;
1574 con->in_tag = CEPH_MSGR_TAG_SEQ;
1575}
1576
1194static void prepare_read_tag(struct ceph_connection *con) 1577static void prepare_read_tag(struct ceph_connection *con)
1195{ 1578{
1196 dout("prepare_read_tag %p\n", con); 1579 dout("prepare_read_tag %p\n", con);
@@ -1597,7 +1980,6 @@ static int process_connect(struct ceph_connection *con)
1597 con->error_msg = "connect authorization failure"; 1980 con->error_msg = "connect authorization failure";
1598 return -1; 1981 return -1;
1599 } 1982 }
1600 con->auth_retry = 1;
1601 con_out_kvec_reset(con); 1983 con_out_kvec_reset(con);
1602 ret = prepare_write_connect(con); 1984 ret = prepare_write_connect(con);
1603 if (ret < 0) 1985 if (ret < 0)
@@ -1668,6 +2050,7 @@ static int process_connect(struct ceph_connection *con)
1668 prepare_read_connect(con); 2050 prepare_read_connect(con);
1669 break; 2051 break;
1670 2052
2053 case CEPH_MSGR_TAG_SEQ:
1671 case CEPH_MSGR_TAG_READY: 2054 case CEPH_MSGR_TAG_READY:
1672 if (req_feat & ~server_feat) { 2055 if (req_feat & ~server_feat) {
1673 pr_err("%s%lld %s protocol feature mismatch," 2056 pr_err("%s%lld %s protocol feature mismatch,"
@@ -1682,7 +2065,7 @@ static int process_connect(struct ceph_connection *con)
1682 2065
1683 WARN_ON(con->state != CON_STATE_NEGOTIATING); 2066 WARN_ON(con->state != CON_STATE_NEGOTIATING);
1684 con->state = CON_STATE_OPEN; 2067 con->state = CON_STATE_OPEN;
1685 2068 con->auth_retry = 0; /* we authenticated; clear flag */
1686 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 2069 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
1687 con->connect_seq++; 2070 con->connect_seq++;
1688 con->peer_features = server_feat; 2071 con->peer_features = server_feat;
@@ -1698,7 +2081,12 @@ static int process_connect(struct ceph_connection *con)
1698 2081
1699 con->delay = 0; /* reset backoff memory */ 2082 con->delay = 0; /* reset backoff memory */
1700 2083
1701 prepare_read_tag(con); 2084 if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
2085 prepare_write_seq(con);
2086 prepare_read_seq(con);
2087 } else {
2088 prepare_read_tag(con);
2089 }
1702 break; 2090 break;
1703 2091
1704 case CEPH_MSGR_TAG_WAIT: 2092 case CEPH_MSGR_TAG_WAIT:
@@ -1732,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con)
1732 return read_partial(con, end, size, &con->in_temp_ack); 2120 return read_partial(con, end, size, &con->in_temp_ack);
1733} 2121}
1734 2122
1735
1736/* 2123/*
1737 * We can finally discard anything that's been acked. 2124 * We can finally discard anything that's been acked.
1738 */ 2125 */
@@ -1757,8 +2144,6 @@ static void process_ack(struct ceph_connection *con)
1757} 2144}
1758 2145
1759 2146
1760
1761
1762static int read_partial_message_section(struct ceph_connection *con, 2147static int read_partial_message_section(struct ceph_connection *con,
1763 struct kvec *section, 2148 struct kvec *section,
1764 unsigned int sec_len, u32 *crc) 2149 unsigned int sec_len, u32 *crc)
@@ -1782,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con,
1782 return 1; 2167 return 1;
1783} 2168}
1784 2169
1785static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); 2170static int read_partial_msg_data(struct ceph_connection *con)
1786
1787static int read_partial_message_pages(struct ceph_connection *con,
1788 struct page **pages,
1789 unsigned int data_len, bool do_datacrc)
1790{ 2171{
1791 void *p; 2172 struct ceph_msg *msg = con->in_msg;
2173 struct ceph_msg_data_cursor *cursor = &msg->cursor;
2174 const bool do_datacrc = !con->msgr->nocrc;
2175 struct page *page;
2176 size_t page_offset;
2177 size_t length;
2178 u32 crc = 0;
1792 int ret; 2179 int ret;
1793 int left;
1794 2180
1795 left = min((int)(data_len - con->in_msg_pos.data_pos), 2181 BUG_ON(!msg);
1796 (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); 2182 if (list_empty(&msg->data))
1797 /* (page) data */ 2183 return -EIO;
1798 BUG_ON(pages == NULL);
1799 p = kmap(pages[con->in_msg_pos.page]);
1800 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
1801 left);
1802 if (ret > 0 && do_datacrc)
1803 con->in_data_crc =
1804 crc32c(con->in_data_crc,
1805 p + con->in_msg_pos.page_pos, ret);
1806 kunmap(pages[con->in_msg_pos.page]);
1807 if (ret <= 0)
1808 return ret;
1809 con->in_msg_pos.data_pos += ret;
1810 con->in_msg_pos.page_pos += ret;
1811 if (con->in_msg_pos.page_pos == PAGE_SIZE) {
1812 con->in_msg_pos.page_pos = 0;
1813 con->in_msg_pos.page++;
1814 }
1815
1816 return ret;
1817}
1818
1819#ifdef CONFIG_BLOCK
1820static int read_partial_message_bio(struct ceph_connection *con,
1821 struct bio **bio_iter, int *bio_seg,
1822 unsigned int data_len, bool do_datacrc)
1823{
1824 struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
1825 void *p;
1826 int ret, left;
1827 2184
1828 left = min((int)(data_len - con->in_msg_pos.data_pos), 2185 if (do_datacrc)
1829 (int)(bv->bv_len - con->in_msg_pos.page_pos)); 2186 crc = con->in_data_crc;
2187 while (cursor->resid) {
2188 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
2189 NULL);
2190 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
2191 if (ret <= 0) {
2192 if (do_datacrc)
2193 con->in_data_crc = crc;
1830 2194
1831 p = kmap(bv->bv_page) + bv->bv_offset; 2195 return ret;
2196 }
1832 2197
1833 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, 2198 if (do_datacrc)
1834 left); 2199 crc = ceph_crc32c_page(crc, page, page_offset, ret);
1835 if (ret > 0 && do_datacrc) 2200 (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1836 con->in_data_crc =
1837 crc32c(con->in_data_crc,
1838 p + con->in_msg_pos.page_pos, ret);
1839 kunmap(bv->bv_page);
1840 if (ret <= 0)
1841 return ret;
1842 con->in_msg_pos.data_pos += ret;
1843 con->in_msg_pos.page_pos += ret;
1844 if (con->in_msg_pos.page_pos == bv->bv_len) {
1845 con->in_msg_pos.page_pos = 0;
1846 iter_bio_next(bio_iter, bio_seg);
1847 } 2201 }
2202 if (do_datacrc)
2203 con->in_data_crc = crc;
1848 2204
1849 return ret; 2205 return 1; /* must return > 0 to indicate success */
1850} 2206}
1851#endif
1852 2207
1853/* 2208/*
1854 * read (part of) a message. 2209 * read (part of) a message.
1855 */ 2210 */
2211static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
2212
1856static int read_partial_message(struct ceph_connection *con) 2213static int read_partial_message(struct ceph_connection *con)
1857{ 2214{
1858 struct ceph_msg *m = con->in_msg; 2215 struct ceph_msg *m = con->in_msg;
@@ -1885,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con)
1885 if (front_len > CEPH_MSG_MAX_FRONT_LEN) 2242 if (front_len > CEPH_MSG_MAX_FRONT_LEN)
1886 return -EIO; 2243 return -EIO;
1887 middle_len = le32_to_cpu(con->in_hdr.middle_len); 2244 middle_len = le32_to_cpu(con->in_hdr.middle_len);
1888 if (middle_len > CEPH_MSG_MAX_DATA_LEN) 2245 if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
1889 return -EIO; 2246 return -EIO;
1890 data_len = le32_to_cpu(con->in_hdr.data_len); 2247 data_len = le32_to_cpu(con->in_hdr.data_len);
1891 if (data_len > CEPH_MSG_MAX_DATA_LEN) 2248 if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1914,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con)
1914 int skip = 0; 2271 int skip = 0;
1915 2272
1916 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 2273 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
1917 con->in_hdr.front_len, con->in_hdr.data_len); 2274 front_len, data_len);
1918 ret = ceph_con_in_msg_alloc(con, &skip); 2275 ret = ceph_con_in_msg_alloc(con, &skip);
1919 if (ret < 0) 2276 if (ret < 0)
1920 return ret; 2277 return ret;
2278
2279 BUG_ON(!con->in_msg ^ skip);
2280 if (con->in_msg && data_len > con->in_msg->data_length) {
2281 pr_warning("%s skipping long message (%u > %zd)\n",
2282 __func__, data_len, con->in_msg->data_length);
2283 ceph_msg_put(con->in_msg);
2284 con->in_msg = NULL;
2285 skip = 1;
2286 }
1921 if (skip) { 2287 if (skip) {
1922 /* skip this message */ 2288 /* skip this message */
1923 dout("alloc_msg said skip message\n"); 2289 dout("alloc_msg said skip message\n");
1924 BUG_ON(con->in_msg);
1925 con->in_base_pos = -front_len - middle_len - data_len - 2290 con->in_base_pos = -front_len - middle_len - data_len -
1926 sizeof(m->footer); 2291 sizeof(m->footer);
1927 con->in_tag = CEPH_MSGR_TAG_READY; 2292 con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1936,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con)
1936 if (m->middle) 2301 if (m->middle)
1937 m->middle->vec.iov_len = 0; 2302 m->middle->vec.iov_len = 0;
1938 2303
1939 con->in_msg_pos.page = 0; 2304 /* prepare for data payload, if any */
1940 if (m->pages)
1941 con->in_msg_pos.page_pos = m->page_alignment;
1942 else
1943 con->in_msg_pos.page_pos = 0;
1944 con->in_msg_pos.data_pos = 0;
1945 2305
1946#ifdef CONFIG_BLOCK 2306 if (data_len)
1947 if (m->bio) 2307 prepare_message_data(con->in_msg, data_len);
1948 init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
1949#endif
1950 } 2308 }
1951 2309
1952 /* front */ 2310 /* front */
@@ -1965,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con)
1965 } 2323 }
1966 2324
1967 /* (page) data */ 2325 /* (page) data */
1968 while (con->in_msg_pos.data_pos < data_len) { 2326 if (data_len) {
1969 if (m->pages) { 2327 ret = read_partial_msg_data(con);
1970 ret = read_partial_message_pages(con, m->pages, 2328 if (ret <= 0)
1971 data_len, do_datacrc); 2329 return ret;
1972 if (ret <= 0)
1973 return ret;
1974#ifdef CONFIG_BLOCK
1975 } else if (m->bio) {
1976 BUG_ON(!m->bio_iter);
1977 ret = read_partial_message_bio(con,
1978 &m->bio_iter, &m->bio_seg,
1979 data_len, do_datacrc);
1980 if (ret <= 0)
1981 return ret;
1982#endif
1983 } else {
1984 BUG_ON(1);
1985 }
1986 } 2330 }
1987 2331
1988 /* footer */ 2332 /* footer */
@@ -2108,13 +2452,13 @@ more_kvec:
2108 goto do_next; 2452 goto do_next;
2109 } 2453 }
2110 2454
2111 ret = write_partial_msg_pages(con); 2455 ret = write_partial_message_data(con);
2112 if (ret == 1) 2456 if (ret == 1)
2113 goto more_kvec; /* we need to send the footer, too! */ 2457 goto more_kvec; /* we need to send the footer, too! */
2114 if (ret == 0) 2458 if (ret == 0)
2115 goto out; 2459 goto out;
2116 if (ret < 0) { 2460 if (ret < 0) {
2117 dout("try_write write_partial_msg_pages err %d\n", 2461 dout("try_write write_partial_message_data err %d\n",
2118 ret); 2462 ret);
2119 goto out; 2463 goto out;
2120 } 2464 }
@@ -2266,7 +2610,12 @@ more:
2266 prepare_read_tag(con); 2610 prepare_read_tag(con);
2267 goto more; 2611 goto more;
2268 } 2612 }
2269 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 2613 if (con->in_tag == CEPH_MSGR_TAG_ACK ||
2614 con->in_tag == CEPH_MSGR_TAG_SEQ) {
2615 /*
2616 * the final handshake seq exchange is semantically
2617 * equivalent to an ACK
2618 */
2270 ret = read_partial_ack(con); 2619 ret = read_partial_ack(con);
2271 if (ret <= 0) 2620 if (ret <= 0)
2272 goto out; 2621 goto out;
@@ -2672,6 +3021,88 @@ void ceph_con_keepalive(struct ceph_connection *con)
2672} 3021}
2673EXPORT_SYMBOL(ceph_con_keepalive); 3022EXPORT_SYMBOL(ceph_con_keepalive);
2674 3023
3024static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3025{
3026 struct ceph_msg_data *data;
3027
3028 if (WARN_ON(!ceph_msg_data_type_valid(type)))
3029 return NULL;
3030
3031 data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
3032 if (data)
3033 data->type = type;
3034 INIT_LIST_HEAD(&data->links);
3035
3036 return data;
3037}
3038
3039static void ceph_msg_data_destroy(struct ceph_msg_data *data)
3040{
3041 if (!data)
3042 return;
3043
3044 WARN_ON(!list_empty(&data->links));
3045 if (data->type == CEPH_MSG_DATA_PAGELIST) {
3046 ceph_pagelist_release(data->pagelist);
3047 kfree(data->pagelist);
3048 }
3049 kmem_cache_free(ceph_msg_data_cache, data);
3050}
3051
3052void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
3053 size_t length, size_t alignment)
3054{
3055 struct ceph_msg_data *data;
3056
3057 BUG_ON(!pages);
3058 BUG_ON(!length);
3059
3060 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
3061 BUG_ON(!data);
3062 data->pages = pages;
3063 data->length = length;
3064 data->alignment = alignment & ~PAGE_MASK;
3065
3066 list_add_tail(&data->links, &msg->data);
3067 msg->data_length += length;
3068}
3069EXPORT_SYMBOL(ceph_msg_data_add_pages);
3070
3071void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
3072 struct ceph_pagelist *pagelist)
3073{
3074 struct ceph_msg_data *data;
3075
3076 BUG_ON(!pagelist);
3077 BUG_ON(!pagelist->length);
3078
3079 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
3080 BUG_ON(!data);
3081 data->pagelist = pagelist;
3082
3083 list_add_tail(&data->links, &msg->data);
3084 msg->data_length += pagelist->length;
3085}
3086EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
3087
3088#ifdef CONFIG_BLOCK
3089void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
3090 size_t length)
3091{
3092 struct ceph_msg_data *data;
3093
3094 BUG_ON(!bio);
3095
3096 data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
3097 BUG_ON(!data);
3098 data->bio = bio;
3099 data->bio_length = length;
3100
3101 list_add_tail(&data->links, &msg->data);
3102 msg->data_length += length;
3103}
3104EXPORT_SYMBOL(ceph_msg_data_add_bio);
3105#endif /* CONFIG_BLOCK */
2675 3106
2676/* 3107/*
2677 * construct a new message with given type, size 3108 * construct a new message with given type, size
@@ -2682,49 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
2682{ 3113{
2683 struct ceph_msg *m; 3114 struct ceph_msg *m;
2684 3115
2685 m = kmalloc(sizeof(*m), flags); 3116 m = kmem_cache_zalloc(ceph_msg_cache, flags);
2686 if (m == NULL) 3117 if (m == NULL)
2687 goto out; 3118 goto out;
2688 kref_init(&m->kref);
2689 3119
2690 m->con = NULL;
2691 INIT_LIST_HEAD(&m->list_head);
2692
2693 m->hdr.tid = 0;
2694 m->hdr.type = cpu_to_le16(type); 3120 m->hdr.type = cpu_to_le16(type);
2695 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 3121 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2696 m->hdr.version = 0;
2697 m->hdr.front_len = cpu_to_le32(front_len); 3122 m->hdr.front_len = cpu_to_le32(front_len);
2698 m->hdr.middle_len = 0;
2699 m->hdr.data_len = 0;
2700 m->hdr.data_off = 0;
2701 m->hdr.reserved = 0;
2702 m->footer.front_crc = 0;
2703 m->footer.middle_crc = 0;
2704 m->footer.data_crc = 0;
2705 m->footer.flags = 0;
2706 m->front_max = front_len;
2707 m->front_is_vmalloc = false;
2708 m->more_to_follow = false;
2709 m->ack_stamp = 0;
2710 m->pool = NULL;
2711
2712 /* middle */
2713 m->middle = NULL;
2714 3123
2715 /* data */ 3124 INIT_LIST_HEAD(&m->list_head);
2716 m->nr_pages = 0; 3125 kref_init(&m->kref);
2717 m->page_alignment = 0; 3126 INIT_LIST_HEAD(&m->data);
2718 m->pages = NULL;
2719 m->pagelist = NULL;
2720#ifdef CONFIG_BLOCK
2721 m->bio = NULL;
2722 m->bio_iter = NULL;
2723 m->bio_seg = 0;
2724#endif /* CONFIG_BLOCK */
2725 m->trail = NULL;
2726 3127
2727 /* front */ 3128 /* front */
3129 m->front_max = front_len;
2728 if (front_len) { 3130 if (front_len) {
2729 if (front_len > PAGE_CACHE_SIZE) { 3131 if (front_len > PAGE_CACHE_SIZE) {
2730 m->front.iov_base = __vmalloc(front_len, flags, 3132 m->front.iov_base = __vmalloc(front_len, flags,
@@ -2802,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
2802static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) 3204static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
2803{ 3205{
2804 struct ceph_msg_header *hdr = &con->in_hdr; 3206 struct ceph_msg_header *hdr = &con->in_hdr;
2805 int type = le16_to_cpu(hdr->type);
2806 int front_len = le32_to_cpu(hdr->front_len);
2807 int middle_len = le32_to_cpu(hdr->middle_len); 3207 int middle_len = le32_to_cpu(hdr->middle_len);
3208 struct ceph_msg *msg;
2808 int ret = 0; 3209 int ret = 0;
2809 3210
2810 BUG_ON(con->in_msg != NULL); 3211 BUG_ON(con->in_msg != NULL);
3212 BUG_ON(!con->ops->alloc_msg);
2811 3213
2812 if (con->ops->alloc_msg) { 3214 mutex_unlock(&con->mutex);
2813 struct ceph_msg *msg; 3215 msg = con->ops->alloc_msg(con, hdr, skip);
2814 3216 mutex_lock(&con->mutex);
2815 mutex_unlock(&con->mutex); 3217 if (con->state != CON_STATE_OPEN) {
2816 msg = con->ops->alloc_msg(con, hdr, skip); 3218 if (msg)
2817 mutex_lock(&con->mutex); 3219 ceph_msg_put(msg);
2818 if (con->state != CON_STATE_OPEN) { 3220 return -EAGAIN;
2819 if (msg)
2820 ceph_msg_put(msg);
2821 return -EAGAIN;
2822 }
2823 con->in_msg = msg;
2824 if (con->in_msg) {
2825 con->in_msg->con = con->ops->get(con);
2826 BUG_ON(con->in_msg->con == NULL);
2827 }
2828 if (*skip) {
2829 con->in_msg = NULL;
2830 return 0;
2831 }
2832 if (!con->in_msg) {
2833 con->error_msg =
2834 "error allocating memory for incoming message";
2835 return -ENOMEM;
2836 }
2837 } 3221 }
2838 if (!con->in_msg) { 3222 if (msg) {
2839 con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); 3223 BUG_ON(*skip);
2840 if (!con->in_msg) { 3224 con->in_msg = msg;
2841 pr_err("unable to allocate msg type %d len %d\n",
2842 type, front_len);
2843 return -ENOMEM;
2844 }
2845 con->in_msg->con = con->ops->get(con); 3225 con->in_msg->con = con->ops->get(con);
2846 BUG_ON(con->in_msg->con == NULL); 3226 BUG_ON(con->in_msg->con == NULL);
2847 con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); 3227 } else {
3228 /*
3229 * Null message pointer means either we should skip
3230 * this message or we couldn't allocate memory. The
3231 * former is not an error.
3232 */
3233 if (*skip)
3234 return 0;
3235 con->error_msg = "error allocating memory for incoming message";
3236
3237 return -ENOMEM;
2848 } 3238 }
2849 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 3239 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2850 3240
@@ -2870,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m)
2870 vfree(m->front.iov_base); 3260 vfree(m->front.iov_base);
2871 else 3261 else
2872 kfree(m->front.iov_base); 3262 kfree(m->front.iov_base);
2873 kfree(m); 3263 kmem_cache_free(ceph_msg_cache, m);
2874} 3264}
2875 3265
2876/* 3266/*
@@ -2879,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m)
2879void ceph_msg_last_put(struct kref *kref) 3269void ceph_msg_last_put(struct kref *kref)
2880{ 3270{
2881 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 3271 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3272 LIST_HEAD(data);
3273 struct list_head *links;
3274 struct list_head *next;
2882 3275
2883 dout("ceph_msg_put last one on %p\n", m); 3276 dout("ceph_msg_put last one on %p\n", m);
2884 WARN_ON(!list_empty(&m->list_head)); 3277 WARN_ON(!list_empty(&m->list_head));
@@ -2888,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref)
2888 ceph_buffer_put(m->middle); 3281 ceph_buffer_put(m->middle);
2889 m->middle = NULL; 3282 m->middle = NULL;
2890 } 3283 }
2891 m->nr_pages = 0;
2892 m->pages = NULL;
2893 3284
2894 if (m->pagelist) { 3285 list_splice_init(&m->data, &data);
2895 ceph_pagelist_release(m->pagelist); 3286 list_for_each_safe(links, next, &data) {
2896 kfree(m->pagelist); 3287 struct ceph_msg_data *data;
2897 m->pagelist = NULL;
2898 }
2899 3288
2900 m->trail = NULL; 3289 data = list_entry(links, struct ceph_msg_data, links);
3290 list_del_init(links);
3291 ceph_msg_data_destroy(data);
3292 }
3293 m->data_length = 0;
2901 3294
2902 if (m->pool) 3295 if (m->pool)
2903 ceph_msgpool_put(m->pool, m); 3296 ceph_msgpool_put(m->pool, m);
@@ -2908,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
2908 3301
2909void ceph_msg_dump(struct ceph_msg *msg) 3302void ceph_msg_dump(struct ceph_msg *msg)
2910{ 3303{
2911 pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, 3304 pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
2912 msg->front_max, msg->nr_pages); 3305 msg->front_max, msg->data_length);
2913 print_hex_dump(KERN_DEBUG, "header: ", 3306 print_hex_dump(KERN_DEBUG, "header: ",
2914 DUMP_PREFIX_OFFSET, 16, 1, 3307 DUMP_PREFIX_OFFSET, 16, 1,
2915 &msg->hdr, sizeof(msg->hdr), true); 3308 &msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index aef5b1062bee..1fe25cd29d0e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work)
737 737
738 __validate_auth(monc); 738 __validate_auth(monc);
739 739
740 if (monc->auth->ops->is_authenticated(monc->auth)) 740 if (ceph_auth_is_authenticated(monc->auth))
741 __send_subscribe(monc); 741 __send_subscribe(monc);
742 } 742 }
743 __schedule_delayed(monc); 743 __schedule_delayed(monc);
@@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
892 892
893 mutex_lock(&monc->mutex); 893 mutex_lock(&monc->mutex);
894 had_debugfs_info = have_debugfs_info(monc); 894 had_debugfs_info = have_debugfs_info(monc);
895 if (monc->auth->ops) 895 was_auth = ceph_auth_is_authenticated(monc->auth);
896 was_auth = monc->auth->ops->is_authenticated(monc->auth);
897 monc->pending_auth = 0; 896 monc->pending_auth = 0;
898 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, 897 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
899 msg->front.iov_len, 898 msg->front.iov_len,
@@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
904 wake_up_all(&monc->client->auth_wq); 903 wake_up_all(&monc->client->auth_wq);
905 } else if (ret > 0) { 904 } else if (ret > 0) {
906 __send_prepared_auth_request(monc, ret); 905 __send_prepared_auth_request(monc, ret);
907 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { 906 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
908 dout("authenticated, starting session\n"); 907 dout("authenticated, starting session\n");
909 908
910 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; 909 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d730dd4d8eb2..a3395fdfbd4f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
1
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -21,6 +22,8 @@
21#define OSD_OP_FRONT_LEN 4096 22#define OSD_OP_FRONT_LEN 4096
22#define OSD_OPREPLY_FRONT_LEN 512 23#define OSD_OPREPLY_FRONT_LEN 512
23 24
25static struct kmem_cache *ceph_osd_request_cache;
26
24static const struct ceph_connection_operations osd_con_ops; 27static const struct ceph_connection_operations osd_con_ops;
25 28
26static void __send_queued(struct ceph_osd_client *osdc); 29static void __send_queued(struct ceph_osd_client *osdc);
@@ -32,12 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
32static void __send_request(struct ceph_osd_client *osdc, 35static void __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req); 36 struct ceph_osd_request *req);
34 37
35static int op_has_extent(int op)
36{
37 return (op == CEPH_OSD_OP_READ ||
38 op == CEPH_OSD_OP_WRITE);
39}
40
41/* 38/*
42 * Implement client access to distributed object storage cluster. 39 * Implement client access to distributed object storage cluster.
43 * 40 *
@@ -63,53 +60,238 @@ static int op_has_extent(int op)
63 * 60 *
64 * fill osd op in request message. 61 * fill osd op in request message.
65 */ 62 */
66static int calc_layout(struct ceph_vino vino, 63static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
67 struct ceph_file_layout *layout, 64 u64 *objnum, u64 *objoff, u64 *objlen)
68 u64 off, u64 *plen,
69 struct ceph_osd_request *req,
70 struct ceph_osd_req_op *op)
71{ 65{
72 u64 orig_len = *plen; 66 u64 orig_len = *plen;
73 u64 bno = 0;
74 u64 objoff = 0;
75 u64 objlen = 0;
76 int r; 67 int r;
77 68
78 /* object extent? */ 69 /* object extent? */
79 r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, 70 r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
80 &objoff, &objlen); 71 objoff, objlen);
81 if (r < 0) 72 if (r < 0)
82 return r; 73 return r;
83 if (objlen < orig_len) { 74 if (*objlen < orig_len) {
84 *plen = objlen; 75 *plen = *objlen;
85 dout(" skipping last %llu, final file extent %llu~%llu\n", 76 dout(" skipping last %llu, final file extent %llu~%llu\n",
86 orig_len - *plen, off, *plen); 77 orig_len - *plen, off, *plen);
87 } 78 }
88 79
89 if (op_has_extent(op->op)) { 80 dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
90 u32 osize = le32_to_cpu(layout->fl_object_size); 81
91 op->extent.offset = objoff; 82 return 0;
92 op->extent.length = objlen; 83}
93 if (op->extent.truncate_size <= off - objoff) { 84
94 op->extent.truncate_size = 0; 85static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
95 } else { 86{
96 op->extent.truncate_size -= off - objoff; 87 memset(osd_data, 0, sizeof (*osd_data));
97 if (op->extent.truncate_size > osize) 88 osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
98 op->extent.truncate_size = osize; 89}
99 } 90
91static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
92 struct page **pages, u64 length, u32 alignment,
93 bool pages_from_pool, bool own_pages)
94{
95 osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
96 osd_data->pages = pages;
97 osd_data->length = length;
98 osd_data->alignment = alignment;
99 osd_data->pages_from_pool = pages_from_pool;
100 osd_data->own_pages = own_pages;
101}
102
103static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
104 struct ceph_pagelist *pagelist)
105{
106 osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
107 osd_data->pagelist = pagelist;
108}
109
110#ifdef CONFIG_BLOCK
111static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
112 struct bio *bio, size_t bio_length)
113{
114 osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
115 osd_data->bio = bio;
116 osd_data->bio_length = bio_length;
117}
118#endif /* CONFIG_BLOCK */
119
120#define osd_req_op_data(oreq, whch, typ, fld) \
121 ({ \
122 BUG_ON(whch >= (oreq)->r_num_ops); \
123 &(oreq)->r_ops[whch].typ.fld; \
124 })
125
126static struct ceph_osd_data *
127osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
128{
129 BUG_ON(which >= osd_req->r_num_ops);
130
131 return &osd_req->r_ops[which].raw_data_in;
132}
133
134struct ceph_osd_data *
135osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
136 unsigned int which)
137{
138 return osd_req_op_data(osd_req, which, extent, osd_data);
139}
140EXPORT_SYMBOL(osd_req_op_extent_osd_data);
141
142struct ceph_osd_data *
143osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
144 unsigned int which)
145{
146 return osd_req_op_data(osd_req, which, cls, response_data);
147}
148EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
149
150void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
151 unsigned int which, struct page **pages,
152 u64 length, u32 alignment,
153 bool pages_from_pool, bool own_pages)
154{
155 struct ceph_osd_data *osd_data;
156
157 osd_data = osd_req_op_raw_data_in(osd_req, which);
158 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
159 pages_from_pool, own_pages);
160}
161EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
162
163void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
164 unsigned int which, struct page **pages,
165 u64 length, u32 alignment,
166 bool pages_from_pool, bool own_pages)
167{
168 struct ceph_osd_data *osd_data;
169
170 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
171 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
172 pages_from_pool, own_pages);
173}
174EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
175
176void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
177 unsigned int which, struct ceph_pagelist *pagelist)
178{
179 struct ceph_osd_data *osd_data;
180
181 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
182 ceph_osd_data_pagelist_init(osd_data, pagelist);
183}
184EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
185
186#ifdef CONFIG_BLOCK
187void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
188 unsigned int which, struct bio *bio, size_t bio_length)
189{
190 struct ceph_osd_data *osd_data;
191
192 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
193 ceph_osd_data_bio_init(osd_data, bio, bio_length);
194}
195EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
196#endif /* CONFIG_BLOCK */
197
198static void osd_req_op_cls_request_info_pagelist(
199 struct ceph_osd_request *osd_req,
200 unsigned int which, struct ceph_pagelist *pagelist)
201{
202 struct ceph_osd_data *osd_data;
203
204 osd_data = osd_req_op_data(osd_req, which, cls, request_info);
205 ceph_osd_data_pagelist_init(osd_data, pagelist);
206}
207
208void osd_req_op_cls_request_data_pagelist(
209 struct ceph_osd_request *osd_req,
210 unsigned int which, struct ceph_pagelist *pagelist)
211{
212 struct ceph_osd_data *osd_data;
213
214 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
215 ceph_osd_data_pagelist_init(osd_data, pagelist);
216}
217EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
218
219void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
220 unsigned int which, struct page **pages, u64 length,
221 u32 alignment, bool pages_from_pool, bool own_pages)
222{
223 struct ceph_osd_data *osd_data;
224
225 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
226 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
227 pages_from_pool, own_pages);
228}
229EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
230
231void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
232 unsigned int which, struct page **pages, u64 length,
233 u32 alignment, bool pages_from_pool, bool own_pages)
234{
235 struct ceph_osd_data *osd_data;
236
237 osd_data = osd_req_op_data(osd_req, which, cls, response_data);
238 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
239 pages_from_pool, own_pages);
240}
241EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
242
243static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
244{
245 switch (osd_data->type) {
246 case CEPH_OSD_DATA_TYPE_NONE:
247 return 0;
248 case CEPH_OSD_DATA_TYPE_PAGES:
249 return osd_data->length;
250 case CEPH_OSD_DATA_TYPE_PAGELIST:
251 return (u64)osd_data->pagelist->length;
252#ifdef CONFIG_BLOCK
253 case CEPH_OSD_DATA_TYPE_BIO:
254 return (u64)osd_data->bio_length;
255#endif /* CONFIG_BLOCK */
256 default:
257 WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
258 return 0;
100 } 259 }
101 req->r_num_pages = calc_pages_for(off, *plen); 260}
102 req->r_page_alignment = off & ~PAGE_MASK;
103 if (op->op == CEPH_OSD_OP_WRITE)
104 op->payload_len = *plen;
105 261
106 dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", 262static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
107 bno, objoff, objlen, req->r_num_pages); 263{
264 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
265 int num_pages;
108 266
109 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); 267 num_pages = calc_pages_for((u64)osd_data->alignment,
110 req->r_oid_len = strlen(req->r_oid); 268 (u64)osd_data->length);
269 ceph_release_page_vector(osd_data->pages, num_pages);
270 }
271 ceph_osd_data_init(osd_data);
272}
273
274static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
275 unsigned int which)
276{
277 struct ceph_osd_req_op *op;
278
279 BUG_ON(which >= osd_req->r_num_ops);
280 op = &osd_req->r_ops[which];
111 281
112 return r; 282 switch (op->op) {
283 case CEPH_OSD_OP_READ:
284 case CEPH_OSD_OP_WRITE:
285 ceph_osd_data_release(&op->extent.osd_data);
286 break;
287 case CEPH_OSD_OP_CALL:
288 ceph_osd_data_release(&op->cls.request_info);
289 ceph_osd_data_release(&op->cls.request_data);
290 ceph_osd_data_release(&op->cls.response_data);
291 break;
292 default:
293 break;
294 }
113} 295}
114 296
115/* 297/*
@@ -117,30 +299,26 @@ static int calc_layout(struct ceph_vino vino,
117 */ 299 */
118void ceph_osdc_release_request(struct kref *kref) 300void ceph_osdc_release_request(struct kref *kref)
119{ 301{
120 struct ceph_osd_request *req = container_of(kref, 302 struct ceph_osd_request *req;
121 struct ceph_osd_request, 303 unsigned int which;
122 r_kref);
123 304
305 req = container_of(kref, struct ceph_osd_request, r_kref);
124 if (req->r_request) 306 if (req->r_request)
125 ceph_msg_put(req->r_request); 307 ceph_msg_put(req->r_request);
126 if (req->r_con_filling_msg) { 308 if (req->r_reply) {
127 dout("%s revoking msg %p from con %p\n", __func__,
128 req->r_reply, req->r_con_filling_msg);
129 ceph_msg_revoke_incoming(req->r_reply); 309 ceph_msg_revoke_incoming(req->r_reply);
130 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
131 req->r_con_filling_msg = NULL;
132 }
133 if (req->r_reply)
134 ceph_msg_put(req->r_reply); 310 ceph_msg_put(req->r_reply);
135 if (req->r_own_pages) 311 }
136 ceph_release_page_vector(req->r_pages, 312
137 req->r_num_pages); 313 for (which = 0; which < req->r_num_ops; which++)
314 osd_req_op_data_release(req, which);
315
138 ceph_put_snap_context(req->r_snapc); 316 ceph_put_snap_context(req->r_snapc);
139 ceph_pagelist_release(&req->r_trail);
140 if (req->r_mempool) 317 if (req->r_mempool)
141 mempool_free(req, req->r_osdc->req_mempool); 318 mempool_free(req, req->r_osdc->req_mempool);
142 else 319 else
143 kfree(req); 320 kmem_cache_free(ceph_osd_request_cache, req);
321
144} 322}
145EXPORT_SYMBOL(ceph_osdc_release_request); 323EXPORT_SYMBOL(ceph_osdc_release_request);
146 324
@@ -154,6 +332,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
154 struct ceph_msg *msg; 332 struct ceph_msg *msg;
155 size_t msg_size; 333 size_t msg_size;
156 334
335 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
336 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
337
157 msg_size = 4 + 4 + 8 + 8 + 4+8; 338 msg_size = 4 + 4 + 8 + 8 + 4+8;
158 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ 339 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
159 msg_size += 1 + 8 + 4 + 4; /* pg_t */ 340 msg_size += 1 + 8 + 4 + 4; /* pg_t */
@@ -168,13 +349,14 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
168 req = mempool_alloc(osdc->req_mempool, gfp_flags); 349 req = mempool_alloc(osdc->req_mempool, gfp_flags);
169 memset(req, 0, sizeof(*req)); 350 memset(req, 0, sizeof(*req));
170 } else { 351 } else {
171 req = kzalloc(sizeof(*req), gfp_flags); 352 req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
172 } 353 }
173 if (req == NULL) 354 if (req == NULL)
174 return NULL; 355 return NULL;
175 356
176 req->r_osdc = osdc; 357 req->r_osdc = osdc;
177 req->r_mempool = use_mempool; 358 req->r_mempool = use_mempool;
359 req->r_num_ops = num_ops;
178 360
179 kref_init(&req->r_kref); 361 kref_init(&req->r_kref);
180 init_completion(&req->r_completion); 362 init_completion(&req->r_completion);
@@ -198,8 +380,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
198 } 380 }
199 req->r_reply = msg; 381 req->r_reply = msg;
200 382
201 ceph_pagelist_init(&req->r_trail);
202
203 /* create request message; allow space for oid */ 383 /* create request message; allow space for oid */
204 if (use_mempool) 384 if (use_mempool)
205 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 385 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -218,60 +398,24 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
218} 398}
219EXPORT_SYMBOL(ceph_osdc_alloc_request); 399EXPORT_SYMBOL(ceph_osdc_alloc_request);
220 400
221static void osd_req_encode_op(struct ceph_osd_request *req, 401static bool osd_req_opcode_valid(u16 opcode)
222 struct ceph_osd_op *dst,
223 struct ceph_osd_req_op *src)
224{ 402{
225 dst->op = cpu_to_le16(src->op); 403 switch (opcode) {
226
227 switch (src->op) {
228 case CEPH_OSD_OP_STAT:
229 break;
230 case CEPH_OSD_OP_READ: 404 case CEPH_OSD_OP_READ:
231 case CEPH_OSD_OP_WRITE: 405 case CEPH_OSD_OP_STAT:
232 dst->extent.offset =
233 cpu_to_le64(src->extent.offset);
234 dst->extent.length =
235 cpu_to_le64(src->extent.length);
236 dst->extent.truncate_size =
237 cpu_to_le64(src->extent.truncate_size);
238 dst->extent.truncate_seq =
239 cpu_to_le32(src->extent.truncate_seq);
240 break;
241 case CEPH_OSD_OP_CALL:
242 dst->cls.class_len = src->cls.class_len;
243 dst->cls.method_len = src->cls.method_len;
244 dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
245
246 ceph_pagelist_append(&req->r_trail, src->cls.class_name,
247 src->cls.class_len);
248 ceph_pagelist_append(&req->r_trail, src->cls.method_name,
249 src->cls.method_len);
250 ceph_pagelist_append(&req->r_trail, src->cls.indata,
251 src->cls.indata_len);
252 break;
253 case CEPH_OSD_OP_STARTSYNC:
254 break;
255 case CEPH_OSD_OP_NOTIFY_ACK:
256 case CEPH_OSD_OP_WATCH:
257 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
258 dst->watch.ver = cpu_to_le64(src->watch.ver);
259 dst->watch.flag = src->watch.flag;
260 break;
261 default:
262 pr_err("unrecognized osd opcode %d\n", dst->op);
263 WARN_ON(1);
264 break;
265 case CEPH_OSD_OP_MAPEXT: 406 case CEPH_OSD_OP_MAPEXT:
266 case CEPH_OSD_OP_MASKTRUNC: 407 case CEPH_OSD_OP_MASKTRUNC:
267 case CEPH_OSD_OP_SPARSE_READ: 408 case CEPH_OSD_OP_SPARSE_READ:
268 case CEPH_OSD_OP_NOTIFY: 409 case CEPH_OSD_OP_NOTIFY:
410 case CEPH_OSD_OP_NOTIFY_ACK:
269 case CEPH_OSD_OP_ASSERT_VER: 411 case CEPH_OSD_OP_ASSERT_VER:
412 case CEPH_OSD_OP_WRITE:
270 case CEPH_OSD_OP_WRITEFULL: 413 case CEPH_OSD_OP_WRITEFULL:
271 case CEPH_OSD_OP_TRUNCATE: 414 case CEPH_OSD_OP_TRUNCATE:
272 case CEPH_OSD_OP_ZERO: 415 case CEPH_OSD_OP_ZERO:
273 case CEPH_OSD_OP_DELETE: 416 case CEPH_OSD_OP_DELETE:
274 case CEPH_OSD_OP_APPEND: 417 case CEPH_OSD_OP_APPEND:
418 case CEPH_OSD_OP_STARTSYNC:
275 case CEPH_OSD_OP_SETTRUNC: 419 case CEPH_OSD_OP_SETTRUNC:
276 case CEPH_OSD_OP_TRIMTRUNC: 420 case CEPH_OSD_OP_TRIMTRUNC:
277 case CEPH_OSD_OP_TMAPUP: 421 case CEPH_OSD_OP_TMAPUP:
@@ -279,11 +423,11 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
279 case CEPH_OSD_OP_TMAPGET: 423 case CEPH_OSD_OP_TMAPGET:
280 case CEPH_OSD_OP_CREATE: 424 case CEPH_OSD_OP_CREATE:
281 case CEPH_OSD_OP_ROLLBACK: 425 case CEPH_OSD_OP_ROLLBACK:
426 case CEPH_OSD_OP_WATCH:
282 case CEPH_OSD_OP_OMAPGETKEYS: 427 case CEPH_OSD_OP_OMAPGETKEYS:
283 case CEPH_OSD_OP_OMAPGETVALS: 428 case CEPH_OSD_OP_OMAPGETVALS:
284 case CEPH_OSD_OP_OMAPGETHEADER: 429 case CEPH_OSD_OP_OMAPGETHEADER:
285 case CEPH_OSD_OP_OMAPGETVALSBYKEYS: 430 case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
286 case CEPH_OSD_OP_MODE_RD:
287 case CEPH_OSD_OP_OMAPSETVALS: 431 case CEPH_OSD_OP_OMAPSETVALS:
288 case CEPH_OSD_OP_OMAPSETHEADER: 432 case CEPH_OSD_OP_OMAPSETHEADER:
289 case CEPH_OSD_OP_OMAPCLEAR: 433 case CEPH_OSD_OP_OMAPCLEAR:
@@ -314,113 +458,233 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
314 case CEPH_OSD_OP_RDUNLOCK: 458 case CEPH_OSD_OP_RDUNLOCK:
315 case CEPH_OSD_OP_UPLOCK: 459 case CEPH_OSD_OP_UPLOCK:
316 case CEPH_OSD_OP_DNLOCK: 460 case CEPH_OSD_OP_DNLOCK:
461 case CEPH_OSD_OP_CALL:
317 case CEPH_OSD_OP_PGLS: 462 case CEPH_OSD_OP_PGLS:
318 case CEPH_OSD_OP_PGLS_FILTER: 463 case CEPH_OSD_OP_PGLS_FILTER:
319 pr_err("unsupported osd opcode %s\n", 464 return true;
320 ceph_osd_op_name(dst->op)); 465 default:
321 WARN_ON(1); 466 return false;
322 break;
323 } 467 }
324 dst->payload_len = cpu_to_le32(src->payload_len);
325} 468}
326 469
327/* 470/*
328 * build new request AND message 471 * This is an osd op init function for opcodes that have no data or
329 * 472 * other information associated with them. It also serves as a
473 * common init routine for all the other init functions, below.
330 */ 474 */
331void ceph_osdc_build_request(struct ceph_osd_request *req, 475static struct ceph_osd_req_op *
332 u64 off, u64 len, unsigned int num_ops, 476_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
333 struct ceph_osd_req_op *src_ops, 477 u16 opcode)
334 struct ceph_snap_context *snapc, u64 snap_id,
335 struct timespec *mtime)
336{ 478{
337 struct ceph_msg *msg = req->r_request; 479 struct ceph_osd_req_op *op;
338 struct ceph_osd_req_op *src_op;
339 void *p;
340 size_t msg_size;
341 int flags = req->r_flags;
342 u64 data_len;
343 int i;
344 480
345 req->r_num_ops = num_ops; 481 BUG_ON(which >= osd_req->r_num_ops);
346 req->r_snapid = snap_id; 482 BUG_ON(!osd_req_opcode_valid(opcode));
347 req->r_snapc = ceph_get_snap_context(snapc);
348 483
349 /* encode request */ 484 op = &osd_req->r_ops[which];
350 msg->hdr.version = cpu_to_le16(4); 485 memset(op, 0, sizeof (*op));
486 op->op = opcode;
351 487
352 p = msg->front.iov_base; 488 return op;
353 ceph_encode_32(&p, 1); /* client_inc is always 1 */ 489}
354 req->r_request_osdmap_epoch = p;
355 p += 4;
356 req->r_request_flags = p;
357 p += 4;
358 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
359 ceph_encode_timespec(p, mtime);
360 p += sizeof(struct ceph_timespec);
361 req->r_request_reassert_version = p;
362 p += sizeof(struct ceph_eversion); /* will get filled in */
363 490
364 /* oloc */ 491void osd_req_op_init(struct ceph_osd_request *osd_req,
365 ceph_encode_8(&p, 4); 492 unsigned int which, u16 opcode)
366 ceph_encode_8(&p, 4); 493{
367 ceph_encode_32(&p, 8 + 4 + 4); 494 (void)_osd_req_op_init(osd_req, which, opcode);
368 req->r_request_pool = p; 495}
369 p += 8; 496EXPORT_SYMBOL(osd_req_op_init);
370 ceph_encode_32(&p, -1); /* preferred */
371 ceph_encode_32(&p, 0); /* key len */
372 497
373 ceph_encode_8(&p, 1); 498void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
374 req->r_request_pgid = p; 499 unsigned int which, u16 opcode,
375 p += 8 + 4; 500 u64 offset, u64 length,
376 ceph_encode_32(&p, -1); /* preferred */ 501 u64 truncate_size, u32 truncate_seq)
502{
503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
504 size_t payload_len = 0;
377 505
378 /* oid */ 506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
379 ceph_encode_32(&p, req->r_oid_len);
380 memcpy(p, req->r_oid, req->r_oid_len);
381 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
382 p += req->r_oid_len;
383 507
384 /* ops */ 508 op->extent.offset = offset;
385 ceph_encode_16(&p, num_ops); 509 op->extent.length = length;
386 src_op = src_ops; 510 op->extent.truncate_size = truncate_size;
387 req->r_request_ops = p; 511 op->extent.truncate_seq = truncate_seq;
388 for (i = 0; i < num_ops; i++, src_op++) { 512 if (opcode == CEPH_OSD_OP_WRITE)
389 osd_req_encode_op(req, p, src_op); 513 payload_len += length;
390 p += sizeof(struct ceph_osd_op);
391 }
392 514
393 /* snaps */ 515 op->payload_len = payload_len;
394 ceph_encode_64(&p, req->r_snapid); 516}
395 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); 517EXPORT_SYMBOL(osd_req_op_extent_init);
396 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); 518
397 if (req->r_snapc) { 519void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
398 for (i = 0; i < snapc->num_snaps; i++) { 520 unsigned int which, u64 length)
399 ceph_encode_64(&p, req->r_snapc->snaps[i]); 521{
400 } 522 struct ceph_osd_req_op *op;
523 u64 previous;
524
525 BUG_ON(which >= osd_req->r_num_ops);
526 op = &osd_req->r_ops[which];
527 previous = op->extent.length;
528
529 if (length == previous)
530 return; /* Nothing to do */
531 BUG_ON(length > previous);
532
533 op->extent.length = length;
534 op->payload_len -= previous - length;
535}
536EXPORT_SYMBOL(osd_req_op_extent_update);
537
538void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
539 u16 opcode, const char *class, const char *method)
540{
541 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
542 struct ceph_pagelist *pagelist;
543 size_t payload_len = 0;
544 size_t size;
545
546 BUG_ON(opcode != CEPH_OSD_OP_CALL);
547
548 pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
549 BUG_ON(!pagelist);
550 ceph_pagelist_init(pagelist);
551
552 op->cls.class_name = class;
553 size = strlen(class);
554 BUG_ON(size > (size_t) U8_MAX);
555 op->cls.class_len = size;
556 ceph_pagelist_append(pagelist, class, size);
557 payload_len += size;
558
559 op->cls.method_name = method;
560 size = strlen(method);
561 BUG_ON(size > (size_t) U8_MAX);
562 op->cls.method_len = size;
563 ceph_pagelist_append(pagelist, method, size);
564 payload_len += size;
565
566 osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
567
568 op->cls.argc = 0; /* currently unused */
569
570 op->payload_len = payload_len;
571}
572EXPORT_SYMBOL(osd_req_op_cls_init);
573
574void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
575 unsigned int which, u16 opcode,
576 u64 cookie, u64 version, int flag)
577{
578 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
579
580 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
581
582 op->watch.cookie = cookie;
583 op->watch.ver = version;
584 if (opcode == CEPH_OSD_OP_WATCH && flag)
585 op->watch.flag = (u8)1;
586}
587EXPORT_SYMBOL(osd_req_op_watch_init);
588
589static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
590 struct ceph_osd_data *osd_data)
591{
592 u64 length = ceph_osd_data_length(osd_data);
593
594 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
595 BUG_ON(length > (u64) SIZE_MAX);
596 if (length)
597 ceph_msg_data_add_pages(msg, osd_data->pages,
598 length, osd_data->alignment);
599 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
600 BUG_ON(!length);
601 ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
602#ifdef CONFIG_BLOCK
603 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
604 ceph_msg_data_add_bio(msg, osd_data->bio, length);
605#endif
606 } else {
607 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
401 } 608 }
609}
402 610
403 req->r_request_attempts = p; 611static u64 osd_req_encode_op(struct ceph_osd_request *req,
404 p += 4; 612 struct ceph_osd_op *dst, unsigned int which)
613{
614 struct ceph_osd_req_op *src;
615 struct ceph_osd_data *osd_data;
616 u64 request_data_len = 0;
617 u64 data_length;
405 618
406 data_len = req->r_trail.length; 619 BUG_ON(which >= req->r_num_ops);
407 if (flags & CEPH_OSD_FLAG_WRITE) { 620 src = &req->r_ops[which];
408 req->r_request->hdr.data_off = cpu_to_le16(off); 621 if (WARN_ON(!osd_req_opcode_valid(src->op))) {
409 data_len += len; 622 pr_err("unrecognized osd opcode %d\n", src->op);
623
624 return 0;
410 } 625 }
411 req->r_request->hdr.data_len = cpu_to_le32(data_len);
412 req->r_request->page_alignment = req->r_page_alignment;
413 626
414 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 627 switch (src->op) {
415 msg_size = p - msg->front.iov_base; 628 case CEPH_OSD_OP_STAT:
416 msg->front.iov_len = msg_size; 629 osd_data = &src->raw_data_in;
417 msg->hdr.front_len = cpu_to_le32(msg_size); 630 ceph_osdc_msg_data_add(req->r_reply, osd_data);
631 break;
632 case CEPH_OSD_OP_READ:
633 case CEPH_OSD_OP_WRITE:
634 if (src->op == CEPH_OSD_OP_WRITE)
635 request_data_len = src->extent.length;
636 dst->extent.offset = cpu_to_le64(src->extent.offset);
637 dst->extent.length = cpu_to_le64(src->extent.length);
638 dst->extent.truncate_size =
639 cpu_to_le64(src->extent.truncate_size);
640 dst->extent.truncate_seq =
641 cpu_to_le32(src->extent.truncate_seq);
642 osd_data = &src->extent.osd_data;
643 if (src->op == CEPH_OSD_OP_WRITE)
644 ceph_osdc_msg_data_add(req->r_request, osd_data);
645 else
646 ceph_osdc_msg_data_add(req->r_reply, osd_data);
647 break;
648 case CEPH_OSD_OP_CALL:
649 dst->cls.class_len = src->cls.class_len;
650 dst->cls.method_len = src->cls.method_len;
651 osd_data = &src->cls.request_info;
652 ceph_osdc_msg_data_add(req->r_request, osd_data);
653 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
654 request_data_len = osd_data->pagelist->length;
655
656 osd_data = &src->cls.request_data;
657 data_length = ceph_osd_data_length(osd_data);
658 if (data_length) {
659 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
660 dst->cls.indata_len = cpu_to_le32(data_length);
661 ceph_osdc_msg_data_add(req->r_request, osd_data);
662 src->payload_len += data_length;
663 request_data_len += data_length;
664 }
665 osd_data = &src->cls.response_data;
666 ceph_osdc_msg_data_add(req->r_reply, osd_data);
667 break;
668 case CEPH_OSD_OP_STARTSYNC:
669 break;
670 case CEPH_OSD_OP_NOTIFY_ACK:
671 case CEPH_OSD_OP_WATCH:
672 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
673 dst->watch.ver = cpu_to_le64(src->watch.ver);
674 dst->watch.flag = src->watch.flag;
675 break;
676 default:
677 pr_err("unsupported osd opcode %s\n",
678 ceph_osd_op_name(src->op));
679 WARN_ON(1);
418 680
419 dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, 681 return 0;
420 num_ops); 682 }
421 return; 683 dst->op = cpu_to_le16(src->op);
684 dst->payload_len = cpu_to_le32(src->payload_len);
685
686 return request_data_len;
422} 687}
423EXPORT_SYMBOL(ceph_osdc_build_request);
424 688
425/* 689/*
426 * build new request AND message, calculate layout, and adjust file 690 * build new request AND message, calculate layout, and adjust file
@@ -436,51 +700,63 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
436struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 700struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
437 struct ceph_file_layout *layout, 701 struct ceph_file_layout *layout,
438 struct ceph_vino vino, 702 struct ceph_vino vino,
439 u64 off, u64 *plen, 703 u64 off, u64 *plen, int num_ops,
440 int opcode, int flags, 704 int opcode, int flags,
441 struct ceph_snap_context *snapc, 705 struct ceph_snap_context *snapc,
442 int do_sync,
443 u32 truncate_seq, 706 u32 truncate_seq,
444 u64 truncate_size, 707 u64 truncate_size,
445 struct timespec *mtime, 708 bool use_mempool)
446 bool use_mempool,
447 int page_align)
448{ 709{
449 struct ceph_osd_req_op ops[2];
450 struct ceph_osd_request *req; 710 struct ceph_osd_request *req;
451 unsigned int num_op = 1; 711 u64 objnum = 0;
712 u64 objoff = 0;
713 u64 objlen = 0;
714 u32 object_size;
715 u64 object_base;
452 int r; 716 int r;
453 717
454 memset(&ops, 0, sizeof ops); 718 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
455
456 ops[0].op = opcode;
457 ops[0].extent.truncate_seq = truncate_seq;
458 ops[0].extent.truncate_size = truncate_size;
459
460 if (do_sync) {
461 ops[1].op = CEPH_OSD_OP_STARTSYNC;
462 num_op++;
463 }
464 719
465 req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, 720 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
466 GFP_NOFS); 721 GFP_NOFS);
467 if (!req) 722 if (!req)
468 return ERR_PTR(-ENOMEM); 723 return ERR_PTR(-ENOMEM);
724
469 req->r_flags = flags; 725 req->r_flags = flags;
470 726
471 /* calculate max write size */ 727 /* calculate max write size */
472 r = calc_layout(vino, layout, off, plen, req, ops); 728 r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
473 if (r < 0) 729 if (r < 0) {
730 ceph_osdc_put_request(req);
474 return ERR_PTR(r); 731 return ERR_PTR(r);
475 req->r_file_layout = *layout; /* keep a copy */ 732 }
476 733
477 /* in case it differs from natural (file) alignment that 734 object_size = le32_to_cpu(layout->fl_object_size);
478 calc_layout filled in for us */ 735 object_base = off - objoff;
479 req->r_num_pages = calc_pages_for(page_align, *plen); 736 if (truncate_size <= object_base) {
480 req->r_page_alignment = page_align; 737 truncate_size = 0;
738 } else {
739 truncate_size -= object_base;
740 if (truncate_size > object_size)
741 truncate_size = object_size;
742 }
743
744 osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
745 truncate_size, truncate_seq);
746
747 /*
748 * A second op in the ops array means the caller wants to
749 * also issue a include a 'startsync' command so that the
750 * osd will flush data quickly.
751 */
752 if (num_ops > 1)
753 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
754
755 req->r_file_layout = *layout; /* keep a copy */
481 756
482 ceph_osdc_build_request(req, off, *plen, num_op, ops, 757 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
483 snapc, vino.snap, mtime); 758 vino.ino, objnum);
759 req->r_oid_len = strlen(req->r_oid);
484 760
485 return req; 761 return req;
486} 762}
@@ -558,21 +834,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
558 struct ceph_osd *osd) 834 struct ceph_osd *osd)
559{ 835{
560 struct ceph_osd_request *req, *nreq; 836 struct ceph_osd_request *req, *nreq;
837 LIST_HEAD(resend);
561 int err; 838 int err;
562 839
563 dout("__kick_osd_requests osd%d\n", osd->o_osd); 840 dout("__kick_osd_requests osd%d\n", osd->o_osd);
564 err = __reset_osd(osdc, osd); 841 err = __reset_osd(osdc, osd);
565 if (err) 842 if (err)
566 return; 843 return;
567 844 /*
845 * Build up a list of requests to resend by traversing the
846 * osd's list of requests. Requests for a given object are
847 * sent in tid order, and that is also the order they're
848 * kept on this list. Therefore all requests that are in
849 * flight will be found first, followed by all requests that
850 * have not yet been sent. And to resend requests while
851 * preserving this order we will want to put any sent
852 * requests back on the front of the osd client's unsent
853 * list.
854 *
855 * So we build a separate ordered list of already-sent
856 * requests for the affected osd and splice it onto the
857 * front of the osd client's unsent list. Once we've seen a
858 * request that has not yet been sent we're done. Those
859 * requests are already sitting right where they belong.
860 */
568 list_for_each_entry(req, &osd->o_requests, r_osd_item) { 861 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
569 list_move(&req->r_req_lru_item, &osdc->req_unsent); 862 if (!req->r_sent)
570 dout("requeued %p tid %llu osd%d\n", req, req->r_tid, 863 break;
864 list_move_tail(&req->r_req_lru_item, &resend);
865 dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
571 osd->o_osd); 866 osd->o_osd);
572 if (!req->r_linger) 867 if (!req->r_linger)
573 req->r_flags |= CEPH_OSD_FLAG_RETRY; 868 req->r_flags |= CEPH_OSD_FLAG_RETRY;
574 } 869 }
870 list_splice(&resend, &osdc->req_unsent);
575 871
872 /*
873 * Linger requests are re-registered before sending, which
874 * sets up a new tid for each. We add them to the unsent
875 * list at the end to keep things in tid order.
876 */
576 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, 877 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
577 r_linger_osd) { 878 r_linger_osd) {
578 /* 879 /*
@@ -581,8 +882,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
581 */ 882 */
582 BUG_ON(!list_empty(&req->r_req_lru_item)); 883 BUG_ON(!list_empty(&req->r_req_lru_item));
583 __register_request(osdc, req); 884 __register_request(osdc, req);
584 list_add(&req->r_req_lru_item, &osdc->req_unsent); 885 list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
585 list_add(&req->r_osd_item, &req->r_osd->o_requests); 886 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
586 __unregister_linger_request(osdc, req); 887 __unregister_linger_request(osdc, req);
587 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, 888 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
588 osd->o_osd); 889 osd->o_osd);
@@ -654,8 +955,7 @@ static void put_osd(struct ceph_osd *osd)
654 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { 955 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
655 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; 956 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
656 957
657 if (ac->ops && ac->ops->destroy_authorizer) 958 ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
658 ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
659 kfree(osd); 959 kfree(osd);
660 } 960 }
661} 961}
@@ -820,14 +1120,6 @@ static void __register_request(struct ceph_osd_client *osdc,
820 } 1120 }
821} 1121}
822 1122
823static void register_request(struct ceph_osd_client *osdc,
824 struct ceph_osd_request *req)
825{
826 mutex_lock(&osdc->request_mutex);
827 __register_request(osdc, req);
828 mutex_unlock(&osdc->request_mutex);
829}
830
831/* 1123/*
832 * called under osdc->request_mutex 1124 * called under osdc->request_mutex
833 */ 1125 */
@@ -952,8 +1244,8 @@ static int __map_request(struct ceph_osd_client *osdc,
952 int err; 1244 int err;
953 1245
954 dout("map_request %p tid %lld\n", req, req->r_tid); 1246 dout("map_request %p tid %lld\n", req, req->r_tid);
955 err = ceph_calc_object_layout(&pgid, req->r_oid, 1247 err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
956 &req->r_file_layout, osdc->osdmap); 1248 ceph_file_layout_pg_pool(req->r_file_layout));
957 if (err) { 1249 if (err) {
958 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1250 list_move(&req->r_req_lru_item, &osdc->req_notarget);
959 return err; 1251 return err;
@@ -1007,10 +1299,10 @@ static int __map_request(struct ceph_osd_client *osdc,
1007 1299
1008 if (req->r_osd) { 1300 if (req->r_osd) {
1009 __remove_osd_from_lru(req->r_osd); 1301 __remove_osd_from_lru(req->r_osd);
1010 list_add(&req->r_osd_item, &req->r_osd->o_requests); 1302 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
1011 list_move(&req->r_req_lru_item, &osdc->req_unsent); 1303 list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
1012 } else { 1304 } else {
1013 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1305 list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
1014 } 1306 }
1015 err = 1; /* osd or pg changed */ 1307 err = 1; /* osd or pg changed */
1016 1308
@@ -1045,8 +1337,14 @@ static void __send_request(struct ceph_osd_client *osdc,
1045 list_move_tail(&req->r_req_lru_item, &osdc->req_lru); 1337 list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
1046 1338
1047 ceph_msg_get(req->r_request); /* send consumes a ref */ 1339 ceph_msg_get(req->r_request); /* send consumes a ref */
1048 ceph_con_send(&req->r_osd->o_con, req->r_request); 1340
1341 /* Mark the request unsafe if this is the first timet's being sent. */
1342
1343 if (!req->r_sent && req->r_unsafe_callback)
1344 req->r_unsafe_callback(req, true);
1049 req->r_sent = req->r_osd->o_incarnation; 1345 req->r_sent = req->r_osd->o_incarnation;
1346
1347 ceph_con_send(&req->r_osd->o_con, req->r_request);
1050} 1348}
1051 1349
1052/* 1350/*
@@ -1134,31 +1432,11 @@ static void handle_osds_timeout(struct work_struct *work)
1134 1432
1135static void complete_request(struct ceph_osd_request *req) 1433static void complete_request(struct ceph_osd_request *req)
1136{ 1434{
1137 if (req->r_safe_callback) 1435 if (req->r_unsafe_callback)
1138 req->r_safe_callback(req, NULL); 1436 req->r_unsafe_callback(req, false);
1139 complete_all(&req->r_safe_completion); /* fsync waiter */ 1437 complete_all(&req->r_safe_completion); /* fsync waiter */
1140} 1438}
1141 1439
1142static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
1143{
1144 __u8 v;
1145
1146 ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
1147 v = ceph_decode_8(p);
1148 if (v > 1) {
1149 pr_warning("do not understand pg encoding %d > 1", v);
1150 return -EINVAL;
1151 }
1152 pgid->pool = ceph_decode_64(p);
1153 pgid->seed = ceph_decode_32(p);
1154 *p += 4;
1155 return 0;
1156
1157bad:
1158 pr_warning("incomplete pg encoding");
1159 return -EINVAL;
1160}
1161
1162/* 1440/*
1163 * handle osd op reply. either call the callback if it is specified, 1441 * handle osd op reply. either call the callback if it is specified,
1164 * or do the completion to wake up the waiting thread. 1442 * or do the completion to wake up the waiting thread.
@@ -1170,7 +1448,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1170 struct ceph_osd_request *req; 1448 struct ceph_osd_request *req;
1171 u64 tid; 1449 u64 tid;
1172 int object_len; 1450 int object_len;
1173 int numops, payload_len, flags; 1451 unsigned int numops;
1452 int payload_len, flags;
1174 s32 result; 1453 s32 result;
1175 s32 retry_attempt; 1454 s32 retry_attempt;
1176 struct ceph_pg pg; 1455 struct ceph_pg pg;
@@ -1178,7 +1457,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1178 u32 reassert_epoch; 1457 u32 reassert_epoch;
1179 u64 reassert_version; 1458 u64 reassert_version;
1180 u32 osdmap_epoch; 1459 u32 osdmap_epoch;
1181 int i; 1460 int already_completed;
1461 u32 bytes;
1462 unsigned int i;
1182 1463
1183 tid = le64_to_cpu(msg->hdr.tid); 1464 tid = le64_to_cpu(msg->hdr.tid);
1184 dout("handle_reply %p tid %llu\n", msg, tid); 1465 dout("handle_reply %p tid %llu\n", msg, tid);
@@ -1191,7 +1472,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1191 ceph_decode_need(&p, end, object_len, bad); 1472 ceph_decode_need(&p, end, object_len, bad);
1192 p += object_len; 1473 p += object_len;
1193 1474
1194 err = __decode_pgid(&p, end, &pg); 1475 err = ceph_decode_pgid(&p, end, &pg);
1195 if (err) 1476 if (err)
1196 goto bad; 1477 goto bad;
1197 1478
@@ -1207,8 +1488,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1207 req = __lookup_request(osdc, tid); 1488 req = __lookup_request(osdc, tid);
1208 if (req == NULL) { 1489 if (req == NULL) {
1209 dout("handle_reply tid %llu dne\n", tid); 1490 dout("handle_reply tid %llu dne\n", tid);
1210 mutex_unlock(&osdc->request_mutex); 1491 goto bad_mutex;
1211 return;
1212 } 1492 }
1213 ceph_osdc_get_request(req); 1493 ceph_osdc_get_request(req);
1214 1494
@@ -1233,9 +1513,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1233 payload_len += len; 1513 payload_len += len;
1234 p += sizeof(*op); 1514 p += sizeof(*op);
1235 } 1515 }
1236 if (payload_len != le32_to_cpu(msg->hdr.data_len)) { 1516 bytes = le32_to_cpu(msg->hdr.data_len);
1517 if (payload_len != bytes) {
1237 pr_warning("sum of op payload lens %d != data_len %d", 1518 pr_warning("sum of op payload lens %d != data_len %d",
1238 payload_len, le32_to_cpu(msg->hdr.data_len)); 1519 payload_len, bytes);
1239 goto bad_put; 1520 goto bad_put;
1240 } 1521 }
1241 1522
@@ -1244,21 +1525,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1244 for (i = 0; i < numops; i++) 1525 for (i = 0; i < numops; i++)
1245 req->r_reply_op_result[i] = ceph_decode_32(&p); 1526 req->r_reply_op_result[i] = ceph_decode_32(&p);
1246 1527
1247 /*
1248 * if this connection filled our message, drop our reference now, to
1249 * avoid a (safe but slower) revoke later.
1250 */
1251 if (req->r_con_filling_msg == con && req->r_reply == msg) {
1252 dout(" dropping con_filling_msg ref %p\n", con);
1253 req->r_con_filling_msg = NULL;
1254 con->ops->put(con);
1255 }
1256
1257 if (!req->r_got_reply) { 1528 if (!req->r_got_reply) {
1258 unsigned int bytes;
1259 1529
1260 req->r_result = result; 1530 req->r_result = result;
1261 bytes = le32_to_cpu(msg->hdr.data_len);
1262 dout("handle_reply result %d bytes %d\n", req->r_result, 1531 dout("handle_reply result %d bytes %d\n", req->r_result,
1263 bytes); 1532 bytes);
1264 if (req->r_result == 0) 1533 if (req->r_result == 0)
@@ -1286,7 +1555,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1286 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 1555 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
1287 __unregister_request(osdc, req); 1556 __unregister_request(osdc, req);
1288 1557
1558 already_completed = req->r_completed;
1559 req->r_completed = 1;
1289 mutex_unlock(&osdc->request_mutex); 1560 mutex_unlock(&osdc->request_mutex);
1561 if (already_completed)
1562 goto done;
1290 1563
1291 if (req->r_callback) 1564 if (req->r_callback)
1292 req->r_callback(req, msg); 1565 req->r_callback(req, msg);
@@ -1303,6 +1576,8 @@ done:
1303 1576
1304bad_put: 1577bad_put:
1305 ceph_osdc_put_request(req); 1578 ceph_osdc_put_request(req);
1579bad_mutex:
1580 mutex_unlock(&osdc->request_mutex);
1306bad: 1581bad:
1307 pr_err("corrupt osd_op_reply got %d %d\n", 1582 pr_err("corrupt osd_op_reply got %d %d\n",
1308 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); 1583 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -1736,6 +2011,104 @@ bad:
1736} 2011}
1737 2012
1738/* 2013/*
2014 * build new request AND message
2015 *
2016 */
2017void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
2018 struct ceph_snap_context *snapc, u64 snap_id,
2019 struct timespec *mtime)
2020{
2021 struct ceph_msg *msg = req->r_request;
2022 void *p;
2023 size_t msg_size;
2024 int flags = req->r_flags;
2025 u64 data_len;
2026 unsigned int i;
2027
2028 req->r_snapid = snap_id;
2029 req->r_snapc = ceph_get_snap_context(snapc);
2030
2031 /* encode request */
2032 msg->hdr.version = cpu_to_le16(4);
2033
2034 p = msg->front.iov_base;
2035 ceph_encode_32(&p, 1); /* client_inc is always 1 */
2036 req->r_request_osdmap_epoch = p;
2037 p += 4;
2038 req->r_request_flags = p;
2039 p += 4;
2040 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
2041 ceph_encode_timespec(p, mtime);
2042 p += sizeof(struct ceph_timespec);
2043 req->r_request_reassert_version = p;
2044 p += sizeof(struct ceph_eversion); /* will get filled in */
2045
2046 /* oloc */
2047 ceph_encode_8(&p, 4);
2048 ceph_encode_8(&p, 4);
2049 ceph_encode_32(&p, 8 + 4 + 4);
2050 req->r_request_pool = p;
2051 p += 8;
2052 ceph_encode_32(&p, -1); /* preferred */
2053 ceph_encode_32(&p, 0); /* key len */
2054
2055 ceph_encode_8(&p, 1);
2056 req->r_request_pgid = p;
2057 p += 8 + 4;
2058 ceph_encode_32(&p, -1); /* preferred */
2059
2060 /* oid */
2061 ceph_encode_32(&p, req->r_oid_len);
2062 memcpy(p, req->r_oid, req->r_oid_len);
2063 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
2064 p += req->r_oid_len;
2065
2066 /* ops--can imply data */
2067 ceph_encode_16(&p, (u16)req->r_num_ops);
2068 data_len = 0;
2069 for (i = 0; i < req->r_num_ops; i++) {
2070 data_len += osd_req_encode_op(req, p, i);
2071 p += sizeof(struct ceph_osd_op);
2072 }
2073
2074 /* snaps */
2075 ceph_encode_64(&p, req->r_snapid);
2076 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
2077 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
2078 if (req->r_snapc) {
2079 for (i = 0; i < snapc->num_snaps; i++) {
2080 ceph_encode_64(&p, req->r_snapc->snaps[i]);
2081 }
2082 }
2083
2084 req->r_request_attempts = p;
2085 p += 4;
2086
2087 /* data */
2088 if (flags & CEPH_OSD_FLAG_WRITE) {
2089 u16 data_off;
2090
2091 /*
2092 * The header "data_off" is a hint to the receiver
2093 * allowing it to align received data into its
2094 * buffers such that there's no need to re-copy
2095 * it before writing it to disk (direct I/O).
2096 */
2097 data_off = (u16) (off & 0xffff);
2098 req->r_request->hdr.data_off = cpu_to_le16(data_off);
2099 }
2100 req->r_request->hdr.data_len = cpu_to_le32(data_len);
2101
2102 BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
2103 msg_size = p - msg->front.iov_base;
2104 msg->front.iov_len = msg_size;
2105 msg->hdr.front_len = cpu_to_le32(msg_size);
2106
2107 dout("build_request msg_size was %d\n", (int)msg_size);
2108}
2109EXPORT_SYMBOL(ceph_osdc_build_request);
2110
2111/*
1739 * Register request, send initial attempt. 2112 * Register request, send initial attempt.
1740 */ 2113 */
1741int ceph_osdc_start_request(struct ceph_osd_client *osdc, 2114int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1744,41 +2117,26 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1744{ 2117{
1745 int rc = 0; 2118 int rc = 0;
1746 2119
1747 req->r_request->pages = req->r_pages;
1748 req->r_request->nr_pages = req->r_num_pages;
1749#ifdef CONFIG_BLOCK
1750 req->r_request->bio = req->r_bio;
1751#endif
1752 req->r_request->trail = &req->r_trail;
1753
1754 register_request(osdc, req);
1755
1756 down_read(&osdc->map_sem); 2120 down_read(&osdc->map_sem);
1757 mutex_lock(&osdc->request_mutex); 2121 mutex_lock(&osdc->request_mutex);
1758 /* 2122 __register_request(osdc, req);
1759 * a racing kick_requests() may have sent the message for us 2123 WARN_ON(req->r_sent);
1760 * while we dropped request_mutex above, so only send now if 2124 rc = __map_request(osdc, req, 0);
1761 * the request still han't been touched yet. 2125 if (rc < 0) {
1762 */ 2126 if (nofail) {
1763 if (req->r_sent == 0) { 2127 dout("osdc_start_request failed map, "
1764 rc = __map_request(osdc, req, 0); 2128 " will retry %lld\n", req->r_tid);
1765 if (rc < 0) { 2129 rc = 0;
1766 if (nofail) {
1767 dout("osdc_start_request failed map, "
1768 " will retry %lld\n", req->r_tid);
1769 rc = 0;
1770 }
1771 goto out_unlock;
1772 }
1773 if (req->r_osd == NULL) {
1774 dout("send_request %p no up osds in pg\n", req);
1775 ceph_monc_request_next_osdmap(&osdc->client->monc);
1776 } else {
1777 __send_request(osdc, req);
1778 } 2130 }
1779 rc = 0; 2131 goto out_unlock;
1780 } 2132 }
1781 2133 if (req->r_osd == NULL) {
2134 dout("send_request %p no up osds in pg\n", req);
2135 ceph_monc_request_next_osdmap(&osdc->client->monc);
2136 } else {
2137 __send_queued(osdc);
2138 }
2139 rc = 0;
1782out_unlock: 2140out_unlock:
1783 mutex_unlock(&osdc->request_mutex); 2141 mutex_unlock(&osdc->request_mutex);
1784 up_read(&osdc->map_sem); 2142 up_read(&osdc->map_sem);
@@ -1940,18 +2298,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1940 2298
1941 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, 2299 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
1942 vino.snap, off, *plen); 2300 vino.snap, off, *plen);
1943 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 2301 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
1944 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2302 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1945 NULL, 0, truncate_seq, truncate_size, NULL, 2303 NULL, truncate_seq, truncate_size,
1946 false, page_align); 2304 false);
1947 if (IS_ERR(req)) 2305 if (IS_ERR(req))
1948 return PTR_ERR(req); 2306 return PTR_ERR(req);
1949 2307
1950 /* it may be a short read due to an object boundary */ 2308 /* it may be a short read due to an object boundary */
1951 req->r_pages = pages;
1952 2309
1953 dout("readpages final extent is %llu~%llu (%d pages align %d)\n", 2310 osd_req_op_extent_osd_data_pages(req, 0,
1954 off, *plen, req->r_num_pages, page_align); 2311 pages, *plen, page_align, false, false);
2312
2313 dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
2314 off, *plen, *plen, page_align);
2315
2316 ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
1955 2317
1956 rc = ceph_osdc_start_request(osdc, req, false); 2318 rc = ceph_osdc_start_request(osdc, req, false);
1957 if (!rc) 2319 if (!rc)
@@ -1978,20 +2340,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1978 int rc = 0; 2340 int rc = 0;
1979 int page_align = off & ~PAGE_MASK; 2341 int page_align = off & ~PAGE_MASK;
1980 2342
1981 BUG_ON(vino.snap != CEPH_NOSNAP); 2343 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
1982 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 2344 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
1983 CEPH_OSD_OP_WRITE, 2345 CEPH_OSD_OP_WRITE,
1984 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, 2346 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1985 snapc, 0, 2347 snapc, truncate_seq, truncate_size,
1986 truncate_seq, truncate_size, mtime, 2348 true);
1987 true, page_align);
1988 if (IS_ERR(req)) 2349 if (IS_ERR(req))
1989 return PTR_ERR(req); 2350 return PTR_ERR(req);
1990 2351
1991 /* it may be a short write due to an object boundary */ 2352 /* it may be a short write due to an object boundary */
1992 req->r_pages = pages; 2353 osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
1993 dout("writepages %llu~%llu (%d pages)\n", off, len, 2354 false, false);
1994 req->r_num_pages); 2355 dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
2356
2357 ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
1995 2358
1996 rc = ceph_osdc_start_request(osdc, req, true); 2359 rc = ceph_osdc_start_request(osdc, req, true);
1997 if (!rc) 2360 if (!rc)
@@ -2005,6 +2368,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2005} 2368}
2006EXPORT_SYMBOL(ceph_osdc_writepages); 2369EXPORT_SYMBOL(ceph_osdc_writepages);
2007 2370
2371int ceph_osdc_setup(void)
2372{
2373 BUG_ON(ceph_osd_request_cache);
2374 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
2375 sizeof (struct ceph_osd_request),
2376 __alignof__(struct ceph_osd_request),
2377 0, NULL);
2378
2379 return ceph_osd_request_cache ? 0 : -ENOMEM;
2380}
2381EXPORT_SYMBOL(ceph_osdc_setup);
2382
2383void ceph_osdc_cleanup(void)
2384{
2385 BUG_ON(!ceph_osd_request_cache);
2386 kmem_cache_destroy(ceph_osd_request_cache);
2387 ceph_osd_request_cache = NULL;
2388}
2389EXPORT_SYMBOL(ceph_osdc_cleanup);
2390
2008/* 2391/*
2009 * handle incoming message 2392 * handle incoming message
2010 */ 2393 */
@@ -2064,13 +2447,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2064 goto out; 2447 goto out;
2065 } 2448 }
2066 2449
2067 if (req->r_con_filling_msg) { 2450 if (req->r_reply->con)
2068 dout("%s revoking msg %p from old con %p\n", __func__, 2451 dout("%s revoking msg %p from old con %p\n", __func__,
2069 req->r_reply, req->r_con_filling_msg); 2452 req->r_reply, req->r_reply->con);
2070 ceph_msg_revoke_incoming(req->r_reply); 2453 ceph_msg_revoke_incoming(req->r_reply);
2071 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
2072 req->r_con_filling_msg = NULL;
2073 }
2074 2454
2075 if (front > req->r_reply->front.iov_len) { 2455 if (front > req->r_reply->front.iov_len) {
2076 pr_warning("get_reply front %d > preallocated %d\n", 2456 pr_warning("get_reply front %d > preallocated %d\n",
@@ -2084,26 +2464,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2084 m = ceph_msg_get(req->r_reply); 2464 m = ceph_msg_get(req->r_reply);
2085 2465
2086 if (data_len > 0) { 2466 if (data_len > 0) {
2087 int want = calc_pages_for(req->r_page_alignment, data_len); 2467 struct ceph_osd_data *osd_data;
2088 2468
2089 if (req->r_pages && unlikely(req->r_num_pages < want)) { 2469 /*
2090 pr_warning("tid %lld reply has %d bytes %d pages, we" 2470 * XXX This is assuming there is only one op containing
2091 " had only %d pages ready\n", tid, data_len, 2471 * XXX page data. Probably OK for reads, but this
2092 want, req->r_num_pages); 2472 * XXX ought to be done more generally.
2093 *skip = 1; 2473 */
2094 ceph_msg_put(m); 2474 osd_data = osd_req_op_extent_osd_data(req, 0);
2095 m = NULL; 2475 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
2096 goto out; 2476 if (osd_data->pages &&
2477 unlikely(osd_data->length < data_len)) {
2478
2479 pr_warning("tid %lld reply has %d bytes "
2480 "we had only %llu bytes ready\n",
2481 tid, data_len, osd_data->length);
2482 *skip = 1;
2483 ceph_msg_put(m);
2484 m = NULL;
2485 goto out;
2486 }
2097 } 2487 }
2098 m->pages = req->r_pages;
2099 m->nr_pages = req->r_num_pages;
2100 m->page_alignment = req->r_page_alignment;
2101#ifdef CONFIG_BLOCK
2102 m->bio = req->r_bio;
2103#endif
2104 } 2488 }
2105 *skip = 0; 2489 *skip = 0;
2106 req->r_con_filling_msg = con->ops->get(con);
2107 dout("get_reply tid %lld %p\n", tid, m); 2490 dout("get_reply tid %lld %p\n", tid, m);
2108 2491
2109out: 2492out:
@@ -2168,13 +2551,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
2168 struct ceph_auth_handshake *auth = &o->o_auth; 2551 struct ceph_auth_handshake *auth = &o->o_auth;
2169 2552
2170 if (force_new && auth->authorizer) { 2553 if (force_new && auth->authorizer) {
2171 if (ac->ops && ac->ops->destroy_authorizer) 2554 ceph_auth_destroy_authorizer(ac, auth->authorizer);
2172 ac->ops->destroy_authorizer(ac, auth->authorizer);
2173 auth->authorizer = NULL; 2555 auth->authorizer = NULL;
2174 } 2556 }
2175 if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { 2557 if (!auth->authorizer) {
2176 int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD, 2558 int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2177 auth); 2559 auth);
2560 if (ret)
2561 return ERR_PTR(ret);
2562 } else {
2563 int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2564 auth);
2178 if (ret) 2565 if (ret)
2179 return ERR_PTR(ret); 2566 return ERR_PTR(ret);
2180 } 2567 }
@@ -2190,11 +2577,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
2190 struct ceph_osd_client *osdc = o->o_osdc; 2577 struct ceph_osd_client *osdc = o->o_osdc;
2191 struct ceph_auth_client *ac = osdc->client->monc.auth; 2578 struct ceph_auth_client *ac = osdc->client->monc.auth;
2192 2579
2193 /* 2580 return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2194 * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
2195 * XXX which do we do: succeed or fail?
2196 */
2197 return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2198} 2581}
2199 2582
2200static int invalidate_authorizer(struct ceph_connection *con) 2583static int invalidate_authorizer(struct ceph_connection *con)
@@ -2203,9 +2586,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
2203 struct ceph_osd_client *osdc = o->o_osdc; 2586 struct ceph_osd_client *osdc = o->o_osdc;
2204 struct ceph_auth_client *ac = osdc->client->monc.auth; 2587 struct ceph_auth_client *ac = osdc->client->monc.auth;
2205 2588
2206 if (ac->ops && ac->ops->invalidate_authorizer) 2589 ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2207 ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2208
2209 return ceph_monc_validate_auth(&osdc->client->monc); 2590 return ceph_monc_validate_auth(&osdc->client->monc);
2210} 2591}
2211 2592
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4543b9aba40c..603ddd92db19 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -654,24 +654,6 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
654 return 0; 654 return 0;
655} 655}
656 656
657static int __decode_pgid(void **p, void *end, struct ceph_pg *pg)
658{
659 u8 v;
660
661 ceph_decode_need(p, end, 1+8+4+4, bad);
662 v = ceph_decode_8(p);
663 if (v != 1)
664 goto bad;
665 pg->pool = ceph_decode_64(p);
666 pg->seed = ceph_decode_32(p);
667 *p += 4; /* skip preferred */
668 return 0;
669
670bad:
671 dout("error decoding pgid\n");
672 return -EINVAL;
673}
674
675/* 657/*
676 * decode a full map. 658 * decode a full map.
677 */ 659 */
@@ -765,7 +747,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
765 struct ceph_pg pgid; 747 struct ceph_pg pgid;
766 struct ceph_pg_mapping *pg; 748 struct ceph_pg_mapping *pg;
767 749
768 err = __decode_pgid(p, end, &pgid); 750 err = ceph_decode_pgid(p, end, &pgid);
769 if (err) 751 if (err)
770 goto bad; 752 goto bad;
771 ceph_decode_need(p, end, sizeof(u32), bad); 753 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -983,7 +965,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
983 struct ceph_pg pgid; 965 struct ceph_pg pgid;
984 u32 pglen; 966 u32 pglen;
985 967
986 err = __decode_pgid(p, end, &pgid); 968 err = ceph_decode_pgid(p, end, &pgid);
987 if (err) 969 if (err)
988 goto bad; 970 goto bad;
989 ceph_decode_need(p, end, sizeof(u32), bad); 971 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -1111,27 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
1111 * calculate an object layout (i.e. pgid) from an oid, 1093 * calculate an object layout (i.e. pgid) from an oid,
1112 * file_layout, and osdmap 1094 * file_layout, and osdmap
1113 */ 1095 */
1114int ceph_calc_object_layout(struct ceph_pg *pg, 1096int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
1115 const char *oid, 1097 struct ceph_osdmap *osdmap, uint64_t pool)
1116 struct ceph_file_layout *fl,
1117 struct ceph_osdmap *osdmap)
1118{ 1098{
1119 unsigned int num, num_mask; 1099 struct ceph_pg_pool_info *pool_info;
1120 struct ceph_pg_pool_info *pool;
1121 1100
1122 BUG_ON(!osdmap); 1101 BUG_ON(!osdmap);
1123 pg->pool = le32_to_cpu(fl->fl_pg_pool); 1102 pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
1124 pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); 1103 if (!pool_info)
1125 if (!pool)
1126 return -EIO; 1104 return -EIO;
1127 pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); 1105 pg->pool = pool;
1128 num = pool->pg_num; 1106 pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
1129 num_mask = pool->pg_num_mask;
1130 1107
1131 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); 1108 dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
1132 return 0; 1109 return 0;
1133} 1110}
1134EXPORT_SYMBOL(ceph_calc_object_layout); 1111EXPORT_SYMBOL(ceph_calc_ceph_pg);
1135 1112
1136/* 1113/*
1137 * Calculate raw osd vector for the given pgid. Return pointer to osd 1114 * Calculate raw osd vector for the given pgid. Return pointer to osd
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 000000000000..154683f5f14c
--- /dev/null
+++ b/net/ceph/snapshot.c
@@ -0,0 +1,78 @@
1/*
2 * snapshot.c Ceph snapshot context utility routines (part of libceph)
3 *
4 * Copyright (C) 2013 Inktank Storage, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
19 */
20
21#include <stddef.h>
22
23#include <linux/types.h>
24#include <linux/export.h>
25#include <linux/ceph/libceph.h>
26
27/*
28 * Ceph snapshot contexts are reference counted objects, and the
29 * returned structure holds a single reference. Acquire additional
30 * references with ceph_get_snap_context(), and release them with
31 * ceph_put_snap_context(). When the reference count reaches zero
32 * the entire structure is freed.
33 */
34
35/*
36 * Create a new ceph snapshot context large enough to hold the
37 * indicated number of snapshot ids (which can be 0). Caller has
38 * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
39 *
40 * Returns a null pointer if an error occurs.
41 */
42struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
43 gfp_t gfp_flags)
44{
45 struct ceph_snap_context *snapc;
46 size_t size;
47
48 size = sizeof (struct ceph_snap_context);
49 size += snap_count * sizeof (snapc->snaps[0]);
50 snapc = kzalloc(size, gfp_flags);
51 if (!snapc)
52 return NULL;
53
54 atomic_set(&snapc->nref, 1);
55 snapc->num_snaps = snap_count;
56
57 return snapc;
58}
59EXPORT_SYMBOL(ceph_create_snap_context);
60
61struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
62{
63 if (sc)
64 atomic_inc(&sc->nref);
65 return sc;
66}
67EXPORT_SYMBOL(ceph_get_snap_context);
68
69void ceph_put_snap_context(struct ceph_snap_context *sc)
70{
71 if (!sc)
72 return;
73 if (atomic_dec_and_test(&sc->nref)) {
74 /*printk(" deleting snap_context %p\n", sc);*/
75 kfree(sc);
76 }
77}
78EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4040673f806a..40b1fadaf637 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2456,7 +2456,7 @@ EXPORT_SYMBOL(netif_skb_features);
2456 * 2. skb is fragmented and the device does not support SG. 2456 * 2. skb is fragmented and the device does not support SG.
2457 */ 2457 */
2458static inline int skb_needs_linearize(struct sk_buff *skb, 2458static inline int skb_needs_linearize(struct sk_buff *skb,
2459 int features) 2459 netdev_features_t features)
2460{ 2460{
2461 return skb_is_nonlinear(skb) && 2461 return skb_is_nonlinear(skb) &&
2462 ((skb_has_frag_list(skb) && 2462 ((skb_has_frag_list(skb) &&
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5a934ef90f8b..22efdaa76ebf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1421,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1421 void __user *useraddr = ifr->ifr_data; 1421 void __user *useraddr = ifr->ifr_data;
1422 u32 ethcmd; 1422 u32 ethcmd;
1423 int rc; 1423 int rc;
1424 u32 old_features; 1424 netdev_features_t old_features;
1425 1425
1426 if (!dev || !netif_device_present(dev)) 1426 if (!dev || !netif_device_present(dev))
1427 return -ENODEV; 1427 return -ENODEV;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 89a3a07d85fb..5c56b217b999 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2705,7 +2705,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2705 2705
2706 if (!ret) { 2706 if (!ret) {
2707 struct seq_file *sf = file->private_data; 2707 struct seq_file *sf = file->private_data;
2708 sf->private = PDE(inode)->data; 2708 sf->private = PDE_DATA(inode);
2709 } 2709 }
2710 return ret; 2710 return ret;
2711}; 2711};
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7427ab5e27d8..981fed397d1d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,21 +606,11 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
606 return sprintf(buf, "%lu\n", val); 606 return sprintf(buf, "%lu\n", val);
607} 607}
608 608
609static void rps_dev_flow_table_release_work(struct work_struct *work)
610{
611 struct rps_dev_flow_table *table = container_of(work,
612 struct rps_dev_flow_table, free_work);
613
614 vfree(table);
615}
616
617static void rps_dev_flow_table_release(struct rcu_head *rcu) 609static void rps_dev_flow_table_release(struct rcu_head *rcu)
618{ 610{
619 struct rps_dev_flow_table *table = container_of(rcu, 611 struct rps_dev_flow_table *table = container_of(rcu,
620 struct rps_dev_flow_table, rcu); 612 struct rps_dev_flow_table, rcu);
621 613 vfree(table);
622 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
623 schedule_work(&table->free_work);
624} 614}
625 615
626static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 616static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 80e271d9e64b..f97652036754 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -10,7 +10,8 @@
10#include <linux/idr.h> 10#include <linux/idr.h>
11#include <linux/rculist.h> 11#include <linux/rculist.h>
12#include <linux/nsproxy.h> 12#include <linux/nsproxy.h>
13#include <linux/proc_fs.h> 13#include <linux/fs.h>
14#include <linux/proc_ns.h>
14#include <linux/file.h> 15#include <linux/file.h>
15#include <linux/export.h> 16#include <linux/export.h>
16#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
@@ -336,7 +337,7 @@ EXPORT_SYMBOL_GPL(__put_net);
336 337
337struct net *get_net_ns_by_fd(int fd) 338struct net *get_net_ns_by_fd(int fd)
338{ 339{
339 struct proc_inode *ei; 340 struct proc_ns *ei;
340 struct file *file; 341 struct file *file;
341 struct net *net; 342 struct net *net;
342 343
@@ -344,7 +345,7 @@ struct net *get_net_ns_by_fd(int fd)
344 if (IS_ERR(file)) 345 if (IS_ERR(file))
345 return ERR_CAST(file); 346 return ERR_CAST(file);
346 347
347 ei = PROC_I(file_inode(file)); 348 ei = get_proc_ns(file_inode(file));
348 if (ei->ns_ops == &netns_operations) 349 if (ei->ns_ops == &netns_operations)
349 net = get_net(ei->ns); 350 net = get_net(ei->ns);
350 else 351 else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a5802a8b652f..cec074be8c43 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -206,7 +206,7 @@ static void netpoll_poll_dev(struct net_device *dev)
206 * the dev_open/close paths use this to block netpoll activity 206 * the dev_open/close paths use this to block netpoll activity
207 * while changing device state 207 * while changing device state
208 */ 208 */
209 if (!down_trylock(&ni->dev_lock)) 209 if (down_trylock(&ni->dev_lock))
210 return; 210 return;
211 211
212 if (!netif_running(dev)) { 212 if (!netif_running(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 5c217427a669..11f2704c3810 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -508,7 +508,7 @@ out:
508 508
509static int pgctrl_open(struct inode *inode, struct file *file) 509static int pgctrl_open(struct inode *inode, struct file *file)
510{ 510{
511 return single_open(file, pgctrl_show, PDE(inode)->data); 511 return single_open(file, pgctrl_show, PDE_DATA(inode));
512} 512}
513 513
514static const struct file_operations pktgen_fops = { 514static const struct file_operations pktgen_fops = {
@@ -1685,7 +1685,7 @@ static ssize_t pktgen_if_write(struct file *file,
1685 1685
1686static int pktgen_if_open(struct inode *inode, struct file *file) 1686static int pktgen_if_open(struct inode *inode, struct file *file)
1687{ 1687{
1688 return single_open(file, pktgen_if_show, PDE(inode)->data); 1688 return single_open(file, pktgen_if_show, PDE_DATA(inode));
1689} 1689}
1690 1690
1691static const struct file_operations pktgen_if_fops = { 1691static const struct file_operations pktgen_if_fops = {
@@ -1823,7 +1823,7 @@ out:
1823 1823
1824static int pktgen_thread_open(struct inode *inode, struct file *file) 1824static int pktgen_thread_open(struct inode *inode, struct file *file)
1825{ 1825{
1826 return single_open(file, pktgen_thread_show, PDE(inode)->data); 1826 return single_open(file, pktgen_thread_show, PDE_DATA(inode));
1827} 1827}
1828 1828
1829static const struct file_operations pktgen_thread_fops = { 1829static const struct file_operations pktgen_thread_fops = {
@@ -1904,7 +1904,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1904 if (pkt_dev->odev != dev) 1904 if (pkt_dev->odev != dev)
1905 continue; 1905 continue;
1906 1906
1907 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); 1907 proc_remove(pkt_dev->entry);
1908 1908
1909 pkt_dev->entry = proc_create_data(dev->name, 0600, 1909 pkt_dev->entry = proc_create_data(dev->name, 0600,
1910 pn->proc_dir, 1910 pn->proc_dir,
@@ -3574,8 +3574,6 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3574static int pktgen_remove_device(struct pktgen_thread *t, 3574static int pktgen_remove_device(struct pktgen_thread *t,
3575 struct pktgen_dev *pkt_dev) 3575 struct pktgen_dev *pkt_dev)
3576{ 3576{
3577 struct pktgen_net *pn = t->net;
3578
3579 pr_debug("remove_device pkt_dev=%p\n", pkt_dev); 3577 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3580 3578
3581 if (pkt_dev->running) { 3579 if (pkt_dev->running) {
@@ -3595,7 +3593,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3595 _rem_dev_from_if_list(t, pkt_dev); 3593 _rem_dev_from_if_list(t, pkt_dev);
3596 3594
3597 if (pkt_dev->entry) 3595 if (pkt_dev->entry)
3598 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); 3596 proc_remove(pkt_dev->entry);
3599 3597
3600#ifdef CONFIG_XFRM 3598#ifdef CONFIG_XFRM
3601 free_SAs(pkt_dev); 3599 free_SAs(pkt_dev);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c61b3bb87a16..d01be2a3ae53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1293,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1293 SKB_GSO_DODGY | 1293 SKB_GSO_DODGY |
1294 SKB_GSO_TCP_ECN | 1294 SKB_GSO_TCP_ECN |
1295 SKB_GSO_GRE | 1295 SKB_GSO_GRE |
1296 SKB_GSO_TCPV6 |
1296 SKB_GSO_UDP_TUNNEL | 1297 SKB_GSO_UDP_TUNNEL |
1297 0))) 1298 0)))
1298 goto out; 1299 goto out;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff06b7543d9f..49616fed9340 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -125,7 +125,6 @@ struct tnode {
125 unsigned int empty_children; /* KEYLENGTH bits needed */ 125 unsigned int empty_children; /* KEYLENGTH bits needed */
126 union { 126 union {
127 struct rcu_head rcu; 127 struct rcu_head rcu;
128 struct work_struct work;
129 struct tnode *tnode_free; 128 struct tnode *tnode_free;
130 }; 129 };
131 struct rt_trie_node __rcu *child[0]; 130 struct rt_trie_node __rcu *child[0];
@@ -383,12 +382,6 @@ static struct tnode *tnode_alloc(size_t size)
383 return vzalloc(size); 382 return vzalloc(size);
384} 383}
385 384
386static void __tnode_vfree(struct work_struct *arg)
387{
388 struct tnode *tn = container_of(arg, struct tnode, work);
389 vfree(tn);
390}
391
392static void __tnode_free_rcu(struct rcu_head *head) 385static void __tnode_free_rcu(struct rcu_head *head)
393{ 386{
394 struct tnode *tn = container_of(head, struct tnode, rcu); 387 struct tnode *tn = container_of(head, struct tnode, rcu);
@@ -397,10 +390,8 @@ static void __tnode_free_rcu(struct rcu_head *head)
397 390
398 if (size <= PAGE_SIZE) 391 if (size <= PAGE_SIZE)
399 kfree(tn); 392 kfree(tn);
400 else { 393 else
401 INIT_WORK(&tn->work, __tnode_vfree); 394 vfree(tn);
402 schedule_work(&tn->work);
403 }
404} 395}
405 396
406static inline void tnode_free(struct tnode *tn) 397static inline void tnode_free(struct tnode *tn)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index d2d5a99fba09..cc22363965d2 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -121,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
121 int ghl = GRE_HEADER_SECTION; 121 int ghl = GRE_HEADER_SECTION;
122 struct gre_base_hdr *greh; 122 struct gre_base_hdr *greh;
123 int mac_len = skb->mac_len; 123 int mac_len = skb->mac_len;
124 __be16 protocol = skb->protocol;
124 int tnl_hlen; 125 int tnl_hlen;
125 bool csum; 126 bool csum;
126 127
@@ -150,7 +151,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
150 151
151 /* setup inner skb. */ 152 /* setup inner skb. */
152 if (greh->protocol == htons(ETH_P_TEB)) { 153 if (greh->protocol == htons(ETH_P_TEB)) {
153 struct ethhdr *eth = eth_hdr(skb); 154 struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
154 skb->protocol = eth->h_proto; 155 skb->protocol = eth->h_proto;
155 } else { 156 } else {
156 skb->protocol = greh->protocol; 157 skb->protocol = greh->protocol;
@@ -199,6 +200,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
199 skb_reset_mac_header(skb); 200 skb_reset_mac_header(skb);
200 skb_set_network_header(skb, mac_len); 201 skb_set_network_header(skb, mac_len);
201 skb->mac_len = mac_len; 202 skb->mac_len = mac_len;
203 skb->protocol = protocol;
202 } while ((skb = skb->next)); 204 } while ((skb = skb->next));
203out: 205out:
204 return segs; 206 return segs;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e97d66a1fdde..7e06641e36ae 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -305,6 +305,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
305 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 305 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
306 spin_lock_init(&q->lock); 306 spin_lock_init(&q->lock);
307 atomic_set(&q->refcnt, 1); 307 atomic_set(&q->refcnt, 1);
308 INIT_LIST_HEAD(&q->lru_list);
308 309
309 return q; 310 return q;
310} 311}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5852b249054f..0b732efd32e2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -105,7 +105,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
105 * functions are also incrementing the refcount on their own, 105 * functions are also incrementing the refcount on their own,
106 * so it's safe to remove the entry even if it's in use. */ 106 * so it's safe to remove the entry even if it's in use. */
107#ifdef CONFIG_PROC_FS 107#ifdef CONFIG_PROC_FS
108 remove_proc_entry(c->pde->name, c->pde->parent); 108 proc_remove(c->pde);
109#endif 109#endif
110 return; 110 return;
111 } 111 }
@@ -631,7 +631,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
631 631
632 if (!ret) { 632 if (!ret) {
633 struct seq_file *sf = file->private_data; 633 struct seq_file *sf = file->private_data;
634 struct clusterip_config *c = PDE(inode)->data; 634 struct clusterip_config *c = PDE_DATA(inode);
635 635
636 sf->private = c; 636 sf->private = c;
637 637
@@ -643,7 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
643 643
644static int clusterip_proc_release(struct inode *inode, struct file *file) 644static int clusterip_proc_release(struct inode *inode, struct file *file)
645{ 645{
646 struct clusterip_config *c = PDE(inode)->data; 646 struct clusterip_config *c = PDE_DATA(inode);
647 int ret; 647 int ret;
648 648
649 ret = seq_release(inode, file); 649 ret = seq_release(inode, file);
@@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
657static ssize_t clusterip_proc_write(struct file *file, const char __user *input, 657static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
658 size_t size, loff_t *ofs) 658 size_t size, loff_t *ofs)
659{ 659{
660 struct clusterip_config *c = PDE(file_inode(file))->data; 660 struct clusterip_config *c = PDE_DATA(file_inode(file));
661#define PROC_WRITELEN 10 661#define PROC_WRITELEN 10
662 char buffer[PROC_WRITELEN+1]; 662 char buffer[PROC_WRITELEN+1];
663 unsigned long nodenum; 663 unsigned long nodenum;
@@ -736,7 +736,7 @@ static void __exit clusterip_tg_exit(void)
736{ 736{
737 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); 737 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
738#ifdef CONFIG_PROC_FS 738#ifdef CONFIG_PROC_FS
739 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); 739 proc_remove(clusterip_procdir);
740#endif 740#endif
741 nf_unregister_hook(&cip_arp_ops); 741 nf_unregister_hook(&cip_arp_ops);
742 xt_unregister_target(&clusterip_tg_reg); 742 xt_unregister_target(&clusterip_tg_reg);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d979657b8a12..719652305a29 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2583,7 +2583,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2583 2583
2584int tcp_seq_open(struct inode *inode, struct file *file) 2584int tcp_seq_open(struct inode *inode, struct file *file)
2585{ 2585{
2586 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2586 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2587 struct tcp_iter_state *s; 2587 struct tcp_iter_state *s;
2588 int err; 2588 int err;
2589 2589
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f696d7c2e9fa..f6a005c485a9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -96,7 +96,8 @@ struct tcpm_hash_bucket {
96 96
97static DEFINE_SPINLOCK(tcp_metrics_lock); 97static DEFINE_SPINLOCK(tcp_metrics_lock);
98 98
99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) 99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
100 bool fastopen_clear)
100{ 101{
101 u32 val; 102 u32 val;
102 103
@@ -122,9 +123,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
122 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 123 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
123 tm->tcpm_ts = 0; 124 tm->tcpm_ts = 0;
124 tm->tcpm_ts_stamp = 0; 125 tm->tcpm_ts_stamp = 0;
125 tm->tcpm_fastopen.mss = 0; 126 if (fastopen_clear) {
126 tm->tcpm_fastopen.syn_loss = 0; 127 tm->tcpm_fastopen.mss = 0;
127 tm->tcpm_fastopen.cookie.len = 0; 128 tm->tcpm_fastopen.syn_loss = 0;
129 tm->tcpm_fastopen.cookie.len = 0;
130 }
128} 131}
129 132
130static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 133static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -154,7 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
154 } 157 }
155 tm->tcpm_addr = *addr; 158 tm->tcpm_addr = *addr;
156 159
157 tcpm_suck_dst(tm, dst); 160 tcpm_suck_dst(tm, dst, true);
158 161
159 if (likely(!reclaim)) { 162 if (likely(!reclaim)) {
160 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; 163 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
@@ -171,7 +174,7 @@ out_unlock:
171static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) 174static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
172{ 175{
173 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) 176 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
174 tcpm_suck_dst(tm, dst); 177 tcpm_suck_dst(tm, dst, false);
175} 178}
176 179
177#define TCP_METRICS_RECLAIM_DEPTH 5 180#define TCP_METRICS_RECLAIM_DEPTH 5
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3159d16441d0..0ae038a4c7a8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2100,7 +2100,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
2100 2100
2101int udp_seq_open(struct inode *inode, struct file *file) 2101int udp_seq_open(struct inode *inode, struct file *file)
2102{ 2102{
2103 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 2103 struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
2104 struct udp_iter_state *s; 2104 struct udp_iter_state *s;
2105 int err; 2105 int err;
2106 2106
@@ -2311,8 +2311,10 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2311 struct sk_buff *segs = ERR_PTR(-EINVAL); 2311 struct sk_buff *segs = ERR_PTR(-EINVAL);
2312 int mac_len = skb->mac_len; 2312 int mac_len = skb->mac_len;
2313 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 2313 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
2314 int outer_hlen; 2314 struct ethhdr *inner_eth = (struct ethhdr *)skb_inner_mac_header(skb);
2315 __be16 protocol = skb->protocol;
2315 netdev_features_t enc_features; 2316 netdev_features_t enc_features;
2317 int outer_hlen;
2316 2318
2317 if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 2319 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
2318 goto out; 2320 goto out;
@@ -2322,6 +2324,8 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2322 skb_reset_mac_header(skb); 2324 skb_reset_mac_header(skb);
2323 skb_set_network_header(skb, skb_inner_network_offset(skb)); 2325 skb_set_network_header(skb, skb_inner_network_offset(skb));
2324 skb->mac_len = skb_inner_network_offset(skb); 2326 skb->mac_len = skb_inner_network_offset(skb);
2327 inner_eth = (struct ethhdr *)skb_mac_header(skb);
2328 skb->protocol = inner_eth->h_proto;
2325 2329
2326 /* segment inner packet. */ 2330 /* segment inner packet. */
2327 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); 2331 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
@@ -2358,6 +2362,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2358 2362
2359 } 2363 }
2360 skb->ip_summed = CHECKSUM_NONE; 2364 skb->ip_summed = CHECKSUM_NONE;
2365 skb->protocol = protocol;
2361 } while ((skb = skb->next)); 2366 } while ((skb = skb->next));
2362out: 2367out:
2363 return segs; 2368 return segs;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 115cc58898f5..f3c1ff4357ff 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -251,7 +251,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
251 251
252static int snmp6_dev_seq_open(struct inode *inode, struct file *file) 252static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
253{ 253{
254 return single_open(file, snmp6_dev_seq_show, PDE(inode)->data); 254 return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
255} 255}
256 256
257static const struct file_operations snmp6_dev_seq_fops = { 257static const struct file_operations snmp6_dev_seq_fops = {
@@ -291,8 +291,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
291 return -ENOENT; 291 return -ENOENT;
292 if (!idev->stats.proc_dir_entry) 292 if (!idev->stats.proc_dir_entry)
293 return -EINVAL; 293 return -EINVAL;
294 remove_proc_entry(idev->stats.proc_dir_entry->name, 294 proc_remove(idev->stats.proc_dir_entry);
295 net->mib.proc_net_devsnmp6);
296 idev->stats.proc_dir_entry = NULL; 295 idev->stats.proc_dir_entry = NULL;
297 return 0; 296 return 0;
298} 297}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 1a73b18683b6..8b03028cca69 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1000,7 +1000,7 @@ static int xt_table_open(struct inode *inode, struct file *file)
1000 sizeof(struct xt_names_priv)); 1000 sizeof(struct xt_names_priv));
1001 if (!ret) { 1001 if (!ret) {
1002 priv = ((struct seq_file *)file->private_data)->private; 1002 priv = ((struct seq_file *)file->private_data)->private;
1003 priv->af = (unsigned long)PDE(inode)->data; 1003 priv->af = (unsigned long)PDE_DATA(inode);
1004 } 1004 }
1005 return ret; 1005 return ret;
1006} 1006}
@@ -1148,7 +1148,7 @@ static int xt_match_open(struct inode *inode, struct file *file)
1148 1148
1149 seq = file->private_data; 1149 seq = file->private_data;
1150 seq->private = trav; 1150 seq->private = trav;
1151 trav->nfproto = (unsigned long)PDE(inode)->data; 1151 trav->nfproto = (unsigned long)PDE_DATA(inode);
1152 return 0; 1152 return 0;
1153} 1153}
1154 1154
@@ -1212,7 +1212,7 @@ static int xt_target_open(struct inode *inode, struct file *file)
1212 1212
1213 seq = file->private_data; 1213 seq = file->private_data;
1214 seq->private = trav; 1214 seq->private = trav;
1215 trav->nfproto = (unsigned long)PDE(inode)->data; 1215 trav->nfproto = (unsigned long)PDE_DATA(inode);
1216 return 0; 1216 return 0;
1217} 1217}
1218 1218
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0199e7bb8f81..9ff035c71403 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -108,6 +108,7 @@ struct xt_hashlimit_htable {
108 108
109 /* seq_file stuff */ 109 /* seq_file stuff */
110 struct proc_dir_entry *pde; 110 struct proc_dir_entry *pde;
111 const char *name;
111 struct net *net; 112 struct net *net;
112 113
113 struct hlist_head hash[0]; /* hashtable itself */ 114 struct hlist_head hash[0]; /* hashtable itself */
@@ -254,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
254 hinfo->count = 0; 255 hinfo->count = 0;
255 hinfo->family = family; 256 hinfo->family = family;
256 hinfo->rnd_initialized = false; 257 hinfo->rnd_initialized = false;
258 hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
259 if (!hinfo->name) {
260 vfree(hinfo);
261 return -ENOMEM;
262 }
257 spin_lock_init(&hinfo->lock); 263 spin_lock_init(&hinfo->lock);
258 264
259 hinfo->pde = proc_create_data(minfo->name, 0, 265 hinfo->pde = proc_create_data(minfo->name, 0,
@@ -261,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
261 hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, 267 hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
262 &dl_file_ops, hinfo); 268 &dl_file_ops, hinfo);
263 if (hinfo->pde == NULL) { 269 if (hinfo->pde == NULL) {
270 kfree(hinfo->name);
264 vfree(hinfo); 271 vfree(hinfo);
265 return -ENOMEM; 272 return -ENOMEM;
266 } 273 }
@@ -331,9 +338,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
331 parent = hashlimit_net->ip6t_hashlimit; 338 parent = hashlimit_net->ip6t_hashlimit;
332 339
333 if(parent != NULL) 340 if(parent != NULL)
334 remove_proc_entry(hinfo->pde->name, parent); 341 remove_proc_entry(hinfo->name, parent);
335 342
336 htable_selective_cleanup(hinfo, select_all); 343 htable_selective_cleanup(hinfo, select_all);
344 kfree(hinfo->name);
337 vfree(hinfo); 345 vfree(hinfo);
338} 346}
339 347
@@ -345,7 +353,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
345 struct xt_hashlimit_htable *hinfo; 353 struct xt_hashlimit_htable *hinfo;
346 354
347 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { 355 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
348 if (!strcmp(name, hinfo->pde->name) && 356 if (!strcmp(name, hinfo->name) &&
349 hinfo->family == family) { 357 hinfo->family == family) {
350 hinfo->use++; 358 hinfo->use++;
351 return hinfo; 359 return hinfo;
@@ -842,7 +850,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
842 850
843 if (!ret) { 851 if (!ret) {
844 struct seq_file *sf = file->private_data; 852 struct seq_file *sf = file->private_data;
845 sf->private = PDE(inode)->data; 853 sf->private = PDE_DATA(inode);
846 } 854 }
847 return ret; 855 return ret;
848} 856}
@@ -888,7 +896,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net)
888 pde = hashlimit_net->ip6t_hashlimit; 896 pde = hashlimit_net->ip6t_hashlimit;
889 897
890 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) 898 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
891 remove_proc_entry(hinfo->pde->name, pde); 899 remove_proc_entry(hinfo->name, pde);
892 900
893 hashlimit_net->ipt_hashlimit = NULL; 901 hashlimit_net->ipt_hashlimit = NULL;
894 hashlimit_net->ip6t_hashlimit = NULL; 902 hashlimit_net->ip6t_hashlimit = NULL;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d9cad315229d..1e657cf715c4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -401,8 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
401 ret = -ENOMEM; 401 ret = -ENOMEM;
402 goto out; 402 goto out;
403 } 403 }
404 pde->uid = uid; 404 proc_set_user(pde, uid, gid);
405 pde->gid = gid;
406#endif 405#endif
407 spin_lock_bh(&recent_lock); 406 spin_lock_bh(&recent_lock);
408 list_add_tail(&t->list, &recent_net->tables); 407 list_add_tail(&t->list, &recent_net->tables);
@@ -525,14 +524,13 @@ static const struct seq_operations recent_seq_ops = {
525 524
526static int recent_seq_open(struct inode *inode, struct file *file) 525static int recent_seq_open(struct inode *inode, struct file *file)
527{ 526{
528 struct proc_dir_entry *pde = PDE(inode);
529 struct recent_iter_state *st; 527 struct recent_iter_state *st;
530 528
531 st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); 529 st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
532 if (st == NULL) 530 if (st == NULL)
533 return -ENOMEM; 531 return -ENOMEM;
534 532
535 st->table = pde->data; 533 st->table = PDE_DATA(inode);
536 return 0; 534 return 0;
537} 535}
538 536
@@ -540,8 +538,7 @@ static ssize_t
540recent_mt_proc_write(struct file *file, const char __user *input, 538recent_mt_proc_write(struct file *file, const char __user *input,
541 size_t size, loff_t *loff) 539 size_t size, loff_t *loff)
542{ 540{
543 const struct proc_dir_entry *pde = PDE(file_inode(file)); 541 struct recent_table *t = PDE_DATA(file_inode(file));
544 struct recent_table *t = pde->data;
545 struct recent_entry *e; 542 struct recent_entry *e;
546 char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; 543 char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
547 const char *c = buf; 544 const char *c = buf;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dd5cd49b0e09..8ec1bca7f859 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -742,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
742 742
743 smp_rmb(); 743 smp_rmb();
744 744
745 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { 745 /* We could have just memset this but we will lose the
746 * flexibility of making the priv area sticky
747 */
746 748
747 /* We could have just memset this but we will lose the 749 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
748 * flexibility of making the priv area sticky 750 BLOCK_NUM_PKTS(pbd1) = 0;
749 */ 751 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
750 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
751 BLOCK_NUM_PKTS(pbd1) = 0;
752 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
753 getnstimeofday(&ts);
754 h1->ts_first_pkt.ts_sec = ts.tv_sec;
755 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
756 pkc1->pkblk_start = (char *)pbd1;
757 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
758 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
759 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
760 pbd1->version = pkc1->version;
761 pkc1->prev = pkc1->nxt_offset;
762 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
763 prb_thaw_queue(pkc1);
764 _prb_refresh_rx_retire_blk_timer(pkc1);
765 752
766 smp_wmb(); 753 getnstimeofday(&ts);
767 754
768 return; 755 h1->ts_first_pkt.ts_sec = ts.tv_sec;
769 } 756 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
770 757
771 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", 758 pkc1->pkblk_start = (char *)pbd1;
772 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); 759 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
773 dump_stack(); 760
774 BUG(); 761 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
762 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
763
764 pbd1->version = pkc1->version;
765 pkc1->prev = pkc1->nxt_offset;
766 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
767
768 prb_thaw_queue(pkc1);
769 _prb_refresh_rx_retire_blk_timer(pkc1);
770
771 smp_wmb();
775} 772}
776 773
777/* 774/*
@@ -862,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
862 prb_close_block(pkc, pbd, po, status); 859 prb_close_block(pkc, pbd, po, status);
863 return; 860 return;
864 } 861 }
865
866 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
867 dump_stack();
868 BUG();
869} 862}
870 863
871static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, 864static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
diff --git a/net/socket.c b/net/socket.c
index 280283f03ccc..b416093997da 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1160,15 +1160,6 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1160 1160
1161static int sock_close(struct inode *inode, struct file *filp) 1161static int sock_close(struct inode *inode, struct file *filp)
1162{ 1162{
1163 /*
1164 * It was possible the inode is NULL we were
1165 * closing an unfinished socket.
1166 */
1167
1168 if (!inode) {
1169 printk(KERN_DEBUG "sock_close: NULL inode\n");
1170 return 0;
1171 }
1172 sock_release(SOCKET_I(inode)); 1163 sock_release(SOCKET_I(inode));
1173 return 0; 1164 return 0;
1174} 1165}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 9e4cb59ef9f0..14e9e53e63d5 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,8 @@
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-y := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-y := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o 8 gss_mech_switch.o svcauth_gss.o \
9 gss_rpc_upcall.o gss_rpc_xdr.o
9 10
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 11obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 12
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 51415b07174e..a764e227fdde 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
238 p = ERR_PTR(-EFAULT); 238 p = ERR_PTR(-EFAULT);
239 goto err; 239 goto err;
240 } 240 }
241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS); 241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
242 if (ret < 0) { 242 if (ret < 0) {
243 p = ERR_PTR(ret); 243 p = ERR_PTR(ret);
244 goto err; 244 goto err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 33255ff889c0..0d3c158ef8fa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -679,6 +679,7 @@ out_err:
679static int 679static int
680gss_import_sec_context_kerberos(const void *p, size_t len, 680gss_import_sec_context_kerberos(const void *p, size_t len,
681 struct gss_ctx *ctx_id, 681 struct gss_ctx *ctx_id,
682 time_t *endtime,
682 gfp_t gfp_mask) 683 gfp_t gfp_mask)
683{ 684{
684 const void *end = (const void *)((const char *)p + len); 685 const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
694 else 695 else
695 ret = gss_import_v2_context(p, end, ctx, gfp_mask); 696 ret = gss_import_v2_context(p, end, ctx, gfp_mask);
696 697
697 if (ret == 0) 698 if (ret == 0) {
698 ctx_id->internal_ctx_id = ctx; 699 ctx_id->internal_ctx_id = ctx;
699 else 700 if (endtime)
701 *endtime = ctx->endtime;
702 } else
700 kfree(ctx); 703 kfree(ctx);
701 704
702 dprintk("RPC: %s: returning %d\n", __func__, ret); 705 dprintk("RPC: %s: returning %d\n", __func__, ret);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 79881d6e68a1..defa9d33925c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -175,7 +175,7 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
175 return gm; 175 return gm;
176} 176}
177 177
178static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) 178struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
179{ 179{
180 struct gss_api_mech *pos, *gm = NULL; 180 struct gss_api_mech *pos, *gm = NULL;
181 char buf[32]; 181 char buf[32];
@@ -386,14 +386,15 @@ int
386gss_import_sec_context(const void *input_token, size_t bufsize, 386gss_import_sec_context(const void *input_token, size_t bufsize,
387 struct gss_api_mech *mech, 387 struct gss_api_mech *mech,
388 struct gss_ctx **ctx_id, 388 struct gss_ctx **ctx_id,
389 time_t *endtime,
389 gfp_t gfp_mask) 390 gfp_t gfp_mask)
390{ 391{
391 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask))) 392 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
392 return -ENOMEM; 393 return -ENOMEM;
393 (*ctx_id)->mech_type = gss_mech_get(mech); 394 (*ctx_id)->mech_type = gss_mech_get(mech);
394 395
395 return mech->gm_ops 396 return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
396 ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask); 397 *ctx_id, endtime, gfp_mask);
397} 398}
398 399
399/* gss_get_mic: compute a mic over message and return mic_token. */ 400/* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644
index 000000000000..d304f41260f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -0,0 +1,358 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.c
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/types.h>
22#include <linux/un.h>
23
24#include <linux/sunrpc/svcauth.h>
25#include "gss_rpc_upcall.h"
26
27#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
28
29#define GSSPROXY_PROGRAM (400112u)
30#define GSSPROXY_VERS_1 (1u)
31
32/*
33 * Encoding/Decoding functions
34 */
35
36enum {
37 GSSX_NULL = 0, /* Unused */
38 GSSX_INDICATE_MECHS = 1,
39 GSSX_GET_CALL_CONTEXT = 2,
40 GSSX_IMPORT_AND_CANON_NAME = 3,
41 GSSX_EXPORT_CRED = 4,
42 GSSX_IMPORT_CRED = 5,
43 GSSX_ACQUIRE_CRED = 6,
44 GSSX_STORE_CRED = 7,
45 GSSX_INIT_SEC_CONTEXT = 8,
46 GSSX_ACCEPT_SEC_CONTEXT = 9,
47 GSSX_RELEASE_HANDLE = 10,
48 GSSX_GET_MIC = 11,
49 GSSX_VERIFY = 12,
50 GSSX_WRAP = 13,
51 GSSX_UNWRAP = 14,
52 GSSX_WRAP_SIZE_LIMIT = 15,
53};
54
55#define PROC(proc, name) \
56[GSSX_##proc] = { \
57 .p_proc = GSSX_##proc, \
58 .p_encode = (kxdreproc_t)gssx_enc_##name, \
59 .p_decode = (kxdrdproc_t)gssx_dec_##name, \
60 .p_arglen = GSSX_ARG_##name##_sz, \
61 .p_replen = GSSX_RES_##name##_sz, \
62 .p_statidx = GSSX_##proc, \
63 .p_name = #proc, \
64}
65
66static struct rpc_procinfo gssp_procedures[] = {
67 PROC(INDICATE_MECHS, indicate_mechs),
68 PROC(GET_CALL_CONTEXT, get_call_context),
69 PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
70 PROC(EXPORT_CRED, export_cred),
71 PROC(IMPORT_CRED, import_cred),
72 PROC(ACQUIRE_CRED, acquire_cred),
73 PROC(STORE_CRED, store_cred),
74 PROC(INIT_SEC_CONTEXT, init_sec_context),
75 PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
76 PROC(RELEASE_HANDLE, release_handle),
77 PROC(GET_MIC, get_mic),
78 PROC(VERIFY, verify),
79 PROC(WRAP, wrap),
80 PROC(UNWRAP, unwrap),
81 PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
82};
83
84
85
86/*
87 * Common transport functions
88 */
89
90static const struct rpc_program gssp_program;
91
92static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
93{
94 static const struct sockaddr_un gssp_localaddr = {
95 .sun_family = AF_LOCAL,
96 .sun_path = GSSPROXY_SOCK_PATHNAME,
97 };
98 struct rpc_create_args args = {
99 .net = net,
100 .protocol = XPRT_TRANSPORT_LOCAL,
101 .address = (struct sockaddr *)&gssp_localaddr,
102 .addrsize = sizeof(gssp_localaddr),
103 .servername = "localhost",
104 .program = &gssp_program,
105 .version = GSSPROXY_VERS_1,
106 .authflavor = RPC_AUTH_NULL,
107 /*
108 * Note we want connection to be done in the caller's
109 * filesystem namespace. We therefore turn off the idle
110 * timeout, which would result in reconnections being
111 * done without the correct namespace:
112 */
113 .flags = RPC_CLNT_CREATE_NOPING |
114 RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
115 };
116 struct rpc_clnt *clnt;
117 int result = 0;
118
119 clnt = rpc_create(&args);
120 if (IS_ERR(clnt)) {
121 dprintk("RPC: failed to create AF_LOCAL gssproxy "
122 "client (errno %ld).\n", PTR_ERR(clnt));
123 result = -PTR_ERR(clnt);
124 *_clnt = NULL;
125 goto out;
126 }
127
128 dprintk("RPC: created new gssp local client (gssp_local_clnt: "
129 "%p)\n", clnt);
130 *_clnt = clnt;
131
132out:
133 return result;
134}
135
136void init_gssp_clnt(struct sunrpc_net *sn)
137{
138 mutex_init(&sn->gssp_lock);
139 sn->gssp_clnt = NULL;
140 init_waitqueue_head(&sn->gssp_wq);
141}
142
143int set_gssp_clnt(struct net *net)
144{
145 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
146 struct rpc_clnt *clnt;
147 int ret;
148
149 mutex_lock(&sn->gssp_lock);
150 ret = gssp_rpc_create(net, &clnt);
151 if (!ret) {
152 if (sn->gssp_clnt)
153 rpc_shutdown_client(sn->gssp_clnt);
154 sn->gssp_clnt = clnt;
155 }
156 mutex_unlock(&sn->gssp_lock);
157 wake_up(&sn->gssp_wq);
158 return ret;
159}
160
161void clear_gssp_clnt(struct sunrpc_net *sn)
162{
163 mutex_lock(&sn->gssp_lock);
164 if (sn->gssp_clnt) {
165 rpc_shutdown_client(sn->gssp_clnt);
166 sn->gssp_clnt = NULL;
167 }
168 mutex_unlock(&sn->gssp_lock);
169}
170
171static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
172{
173 struct rpc_clnt *clnt;
174
175 mutex_lock(&sn->gssp_lock);
176 clnt = sn->gssp_clnt;
177 if (clnt)
178 atomic_inc(&clnt->cl_count);
179 mutex_unlock(&sn->gssp_lock);
180 return clnt;
181}
182
183static int gssp_call(struct net *net, struct rpc_message *msg)
184{
185 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
186 struct rpc_clnt *clnt;
187 int status;
188
189 clnt = get_gssp_clnt(sn);
190 if (!clnt)
191 return -EIO;
192 status = rpc_call_sync(clnt, msg, 0);
193 if (status < 0) {
194 dprintk("gssp: rpc_call returned error %d\n", -status);
195 switch (status) {
196 case -EPROTONOSUPPORT:
197 status = -EINVAL;
198 break;
199 case -ECONNREFUSED:
200 case -ETIMEDOUT:
201 case -ENOTCONN:
202 status = -EAGAIN;
203 break;
204 case -ERESTARTSYS:
205 if (signalled ())
206 status = -EINTR;
207 break;
208 default:
209 break;
210 }
211 }
212 rpc_release_client(clnt);
213 return status;
214}
215
216
217/*
218 * Public functions
219 */
220
221/* numbers somewhat arbitrary but large enough for current needs */
222#define GSSX_MAX_OUT_HANDLE 128
223#define GSSX_MAX_SRC_PRINC 256
224#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
225 GSSX_max_oid_sz + \
226 GSSX_max_princ_sz + \
227 sizeof(struct svc_cred))
228
229int gssp_accept_sec_context_upcall(struct net *net,
230 struct gssp_upcall_data *data)
231{
232 struct gssx_ctx ctxh = {
233 .state = data->in_handle
234 };
235 struct gssx_arg_accept_sec_context arg = {
236 .input_token = data->in_token,
237 };
238 struct gssx_ctx rctxh = {
239 /*
240 * pass in the max length we expect for each of these
241 * buffers but let the xdr code kmalloc them:
242 */
243 .exported_context_token.len = GSSX_max_output_handle_sz,
244 .mech.len = GSS_OID_MAX_LEN,
245 .src_name.display_name.len = GSSX_max_princ_sz
246 };
247 struct gssx_res_accept_sec_context res = {
248 .context_handle = &rctxh,
249 .output_token = &data->out_token
250 };
251 struct rpc_message msg = {
252 .rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
253 .rpc_argp = &arg,
254 .rpc_resp = &res,
255 .rpc_cred = NULL, /* FIXME ? */
256 };
257 struct xdr_netobj client_name = { 0 , NULL };
258 int ret;
259
260 if (data->in_handle.len != 0)
261 arg.context_handle = &ctxh;
262 res.output_token->len = GSSX_max_output_token_sz;
263
264 /* use nfs/ for targ_name ? */
265
266 ret = gssp_call(net, &msg);
267
268 /* we need to fetch all data even in case of error so
269 * that we can free special strctures is they have been allocated */
270 data->major_status = res.status.major_status;
271 data->minor_status = res.status.minor_status;
272 if (res.context_handle) {
273 data->out_handle = rctxh.exported_context_token;
274 data->mech_oid.len = rctxh.mech.len;
275 memcpy(data->mech_oid.data, rctxh.mech.data,
276 data->mech_oid.len);
277 client_name = rctxh.src_name.display_name;
278 }
279
280 if (res.options.count == 1) {
281 gssx_buffer *value = &res.options.data[0].value;
282 /* Currently we only decode CREDS_VALUE, if we add
283 * anything else we'll have to loop and match on the
284 * option name */
285 if (value->len == 1) {
286 /* steal group info from struct svc_cred */
287 data->creds = *(struct svc_cred *)value->data;
288 data->found_creds = 1;
289 }
290 /* whether we use it or not, free data */
291 kfree(value->data);
292 }
293
294 if (res.options.count != 0) {
295 kfree(res.options.data);
296 }
297
298 /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
299 if (data->found_creds && client_name.data != NULL) {
300 char *c;
301
302 data->creds.cr_principal = kstrndup(client_name.data,
303 client_name.len, GFP_KERNEL);
304 if (data->creds.cr_principal) {
305 /* terminate and remove realm part */
306 c = strchr(data->creds.cr_principal, '@');
307 if (c) {
308 *c = '\0';
309
310 /* change service-hostname delimiter */
311 c = strchr(data->creds.cr_principal, '/');
312 if (c) *c = '@';
313 }
314 if (!c) {
315 /* not a service principal */
316 kfree(data->creds.cr_principal);
317 data->creds.cr_principal = NULL;
318 }
319 }
320 }
321 kfree(client_name.data);
322
323 return ret;
324}
325
326void gssp_free_upcall_data(struct gssp_upcall_data *data)
327{
328 kfree(data->in_handle.data);
329 kfree(data->out_handle.data);
330 kfree(data->out_token.data);
331 kfree(data->mech_oid.data);
332 free_svc_cred(&data->creds);
333}
334
335/*
336 * Initialization stuff
337 */
338
339static const struct rpc_version gssp_version1 = {
340 .number = GSSPROXY_VERS_1,
341 .nrprocs = ARRAY_SIZE(gssp_procedures),
342 .procs = gssp_procedures,
343};
344
345static const struct rpc_version *gssp_version[] = {
346 NULL,
347 &gssp_version1,
348};
349
350static struct rpc_stat gssp_stats;
351
352static const struct rpc_program gssp_program = {
353 .name = "gssproxy",
354 .number = GSSPROXY_PROGRAM,
355 .nrvers = ARRAY_SIZE(gssp_version),
356 .version = gssp_version,
357 .stats = &gssp_stats,
358};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644
index 000000000000..1e542aded90a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -0,0 +1,48 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.h
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _GSS_RPC_UPCALL_H
22#define _GSS_RPC_UPCALL_H
23
24#include <linux/sunrpc/gss_api.h>
25#include <linux/sunrpc/auth_gss.h>
26#include "gss_rpc_xdr.h"
27#include "../netns.h"
28
29struct gssp_upcall_data {
30 struct xdr_netobj in_handle;
31 struct gssp_in_token in_token;
32 struct xdr_netobj out_handle;
33 struct xdr_netobj out_token;
34 struct rpcsec_gss_oid mech_oid;
35 struct svc_cred creds;
36 int found_creds;
37 int major_status;
38 int minor_status;
39};
40
41int gssp_accept_sec_context_upcall(struct net *net,
42 struct gssp_upcall_data *data);
43void gssp_free_upcall_data(struct gssp_upcall_data *data);
44
45void init_gssp_clnt(struct sunrpc_net *);
46int set_gssp_clnt(struct net *);
47void clear_gssp_clnt(struct sunrpc_net *);
48#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644
index 000000000000..5c4c61d527e2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -0,0 +1,838 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/sunrpc/svcauth.h>
22#include "gss_rpc_xdr.h"
23
24static bool gssx_check_pointer(struct xdr_stream *xdr)
25{
26 __be32 *p;
27
28 p = xdr_reserve_space(xdr, 4);
29 if (unlikely(p == NULL))
30 return -ENOSPC;
31 return *p?true:false;
32}
33
34static int gssx_enc_bool(struct xdr_stream *xdr, int v)
35{
36 __be32 *p;
37
38 p = xdr_reserve_space(xdr, 4);
39 if (unlikely(p == NULL))
40 return -ENOSPC;
41 *p = v ? xdr_one : xdr_zero;
42 return 0;
43}
44
45static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
46{
47 __be32 *p;
48
49 p = xdr_inline_decode(xdr, 4);
50 if (unlikely(p == NULL))
51 return -ENOSPC;
52 *v = be32_to_cpu(*p);
53 return 0;
54}
55
56static int gssx_enc_buffer(struct xdr_stream *xdr,
57 gssx_buffer *buf)
58{
59 __be32 *p;
60
61 p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
62 if (!p)
63 return -ENOSPC;
64 xdr_encode_opaque(p, buf->data, buf->len);
65 return 0;
66}
67
68static int gssx_enc_in_token(struct xdr_stream *xdr,
69 struct gssp_in_token *in)
70{
71 __be32 *p;
72
73 p = xdr_reserve_space(xdr, 4);
74 if (!p)
75 return -ENOSPC;
76 *p = cpu_to_be32(in->page_len);
77
78 /* all we need to do is to write pages */
79 xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
80
81 return 0;
82}
83
84
85static int gssx_dec_buffer(struct xdr_stream *xdr,
86 gssx_buffer *buf)
87{
88 u32 length;
89 __be32 *p;
90
91 p = xdr_inline_decode(xdr, 4);
92 if (unlikely(p == NULL))
93 return -ENOSPC;
94
95 length = be32_to_cpup(p);
96 p = xdr_inline_decode(xdr, length);
97 if (unlikely(p == NULL))
98 return -ENOSPC;
99
100 if (buf->len == 0) {
101 /* we intentionally are not interested in this buffer */
102 return 0;
103 }
104 if (length > buf->len)
105 return -ENOSPC;
106
107 if (!buf->data) {
108 buf->data = kmemdup(p, length, GFP_KERNEL);
109 if (!buf->data)
110 return -ENOMEM;
111 } else {
112 memcpy(buf->data, p, length);
113 }
114 buf->len = length;
115 return 0;
116}
117
118static int gssx_enc_option(struct xdr_stream *xdr,
119 struct gssx_option *opt)
120{
121 int err;
122
123 err = gssx_enc_buffer(xdr, &opt->option);
124 if (err)
125 return err;
126 err = gssx_enc_buffer(xdr, &opt->value);
127 return err;
128}
129
130static int gssx_dec_option(struct xdr_stream *xdr,
131 struct gssx_option *opt)
132{
133 int err;
134
135 err = gssx_dec_buffer(xdr, &opt->option);
136 if (err)
137 return err;
138 err = gssx_dec_buffer(xdr, &opt->value);
139 return err;
140}
141
142static int dummy_enc_opt_array(struct xdr_stream *xdr,
143 struct gssx_option_array *oa)
144{
145 __be32 *p;
146
147 if (oa->count != 0)
148 return -EINVAL;
149
150 p = xdr_reserve_space(xdr, 4);
151 if (!p)
152 return -ENOSPC;
153 *p = 0;
154
155 return 0;
156}
157
158static int dummy_dec_opt_array(struct xdr_stream *xdr,
159 struct gssx_option_array *oa)
160{
161 struct gssx_option dummy;
162 u32 count, i;
163 __be32 *p;
164
165 p = xdr_inline_decode(xdr, 4);
166 if (unlikely(p == NULL))
167 return -ENOSPC;
168 count = be32_to_cpup(p++);
169 memset(&dummy, 0, sizeof(dummy));
170 for (i = 0; i < count; i++) {
171 gssx_dec_option(xdr, &dummy);
172 }
173
174 oa->count = 0;
175 oa->data = NULL;
176 return 0;
177}
178
179static int get_s32(void **p, void *max, s32 *res)
180{
181 void *base = *p;
182 void *next = (void *)((char *)base + sizeof(s32));
183 if (unlikely(next > max || next < base))
184 return -EINVAL;
185 memcpy(res, base, sizeof(s32));
186 *p = next;
187 return 0;
188}
189
190static int gssx_dec_linux_creds(struct xdr_stream *xdr,
191 struct svc_cred *creds)
192{
193 u32 length;
194 __be32 *p;
195 void *q, *end;
196 s32 tmp;
197 int N, i, err;
198
199 p = xdr_inline_decode(xdr, 4);
200 if (unlikely(p == NULL))
201 return -ENOSPC;
202
203 length = be32_to_cpup(p);
204
205 /* FIXME: we do not want to use the scratch buffer for this one
206 * may need to use functions that allows us to access an io vector
207 * directly */
208 p = xdr_inline_decode(xdr, length);
209 if (unlikely(p == NULL))
210 return -ENOSPC;
211
212 q = p;
213 end = q + length;
214
215 /* uid */
216 err = get_s32(&q, end, &tmp);
217 if (err)
218 return err;
219 creds->cr_uid = make_kuid(&init_user_ns, tmp);
220
221 /* gid */
222 err = get_s32(&q, end, &tmp);
223 if (err)
224 return err;
225 creds->cr_gid = make_kgid(&init_user_ns, tmp);
226
227 /* number of additional gid's */
228 err = get_s32(&q, end, &tmp);
229 if (err)
230 return err;
231 N = tmp;
232 creds->cr_group_info = groups_alloc(N);
233 if (creds->cr_group_info == NULL)
234 return -ENOMEM;
235
236 /* gid's */
237 for (i = 0; i < N; i++) {
238 kgid_t kgid;
239 err = get_s32(&q, end, &tmp);
240 if (err)
241 goto out_free_groups;
242 err = -EINVAL;
243 kgid = make_kgid(&init_user_ns, tmp);
244 if (!gid_valid(kgid))
245 goto out_free_groups;
246 GROUP_AT(creds->cr_group_info, i) = kgid;
247 }
248
249 return 0;
250out_free_groups:
251 groups_free(creds->cr_group_info);
252 return err;
253}
254
255static int gssx_dec_option_array(struct xdr_stream *xdr,
256 struct gssx_option_array *oa)
257{
258 struct svc_cred *creds;
259 u32 count, i;
260 __be32 *p;
261 int err;
262
263 p = xdr_inline_decode(xdr, 4);
264 if (unlikely(p == NULL))
265 return -ENOSPC;
266 count = be32_to_cpup(p++);
267 if (count != 0) {
268 /* we recognize only 1 currently: CREDS_VALUE */
269 oa->count = 1;
270
271 oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
272 if (!oa->data)
273 return -ENOMEM;
274
275 creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
276 if (!creds) {
277 kfree(oa->data);
278 return -ENOMEM;
279 }
280
281 oa->data[0].option.data = CREDS_VALUE;
282 oa->data[0].option.len = sizeof(CREDS_VALUE);
283 oa->data[0].value.data = (void *)creds;
284 oa->data[0].value.len = 0;
285 }
286 for (i = 0; i < count; i++) {
287 gssx_buffer dummy = { 0, NULL };
288 u32 length;
289
290 /* option buffer */
291 p = xdr_inline_decode(xdr, 4);
292 if (unlikely(p == NULL))
293 return -ENOSPC;
294
295 length = be32_to_cpup(p);
296 p = xdr_inline_decode(xdr, length);
297 if (unlikely(p == NULL))
298 return -ENOSPC;
299
300 if (length == sizeof(CREDS_VALUE) &&
301 memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
302 /* We have creds here. parse them */
303 err = gssx_dec_linux_creds(xdr, creds);
304 if (err)
305 return err;
306 oa->data[0].value.len = 1; /* presence */
307 } else {
308 /* consume uninteresting buffer */
309 err = gssx_dec_buffer(xdr, &dummy);
310 if (err)
311 return err;
312 }
313 }
314 return 0;
315}
316
317static int gssx_dec_status(struct xdr_stream *xdr,
318 struct gssx_status *status)
319{
320 __be32 *p;
321 int err;
322
323 /* status->major_status */
324 p = xdr_inline_decode(xdr, 8);
325 if (unlikely(p == NULL))
326 return -ENOSPC;
327 p = xdr_decode_hyper(p, &status->major_status);
328
329 /* status->mech */
330 err = gssx_dec_buffer(xdr, &status->mech);
331 if (err)
332 return err;
333
334 /* status->minor_status */
335 p = xdr_inline_decode(xdr, 8);
336 if (unlikely(p == NULL))
337 return -ENOSPC;
338 p = xdr_decode_hyper(p, &status->minor_status);
339
340 /* status->major_status_string */
341 err = gssx_dec_buffer(xdr, &status->major_status_string);
342 if (err)
343 return err;
344
345 /* status->minor_status_string */
346 err = gssx_dec_buffer(xdr, &status->minor_status_string);
347 if (err)
348 return err;
349
350 /* status->server_ctx */
351 err = gssx_dec_buffer(xdr, &status->server_ctx);
352 if (err)
353 return err;
354
355 /* we assume we have no options for now, so simply consume them */
356 /* status->options */
357 err = dummy_dec_opt_array(xdr, &status->options);
358
359 return err;
360}
361
362static int gssx_enc_call_ctx(struct xdr_stream *xdr,
363 struct gssx_call_ctx *ctx)
364{
365 struct gssx_option opt;
366 __be32 *p;
367 int err;
368
369 /* ctx->locale */
370 err = gssx_enc_buffer(xdr, &ctx->locale);
371 if (err)
372 return err;
373
374 /* ctx->server_ctx */
375 err = gssx_enc_buffer(xdr, &ctx->server_ctx);
376 if (err)
377 return err;
378
379 /* we always want to ask for lucid contexts */
380 /* ctx->options */
381 p = xdr_reserve_space(xdr, 4);
382 *p = cpu_to_be32(2);
383
384 /* we want a lucid_v1 context */
385 opt.option.data = LUCID_OPTION;
386 opt.option.len = sizeof(LUCID_OPTION);
387 opt.value.data = LUCID_VALUE;
388 opt.value.len = sizeof(LUCID_VALUE);
389 err = gssx_enc_option(xdr, &opt);
390
391 /* ..and user creds */
392 opt.option.data = CREDS_OPTION;
393 opt.option.len = sizeof(CREDS_OPTION);
394 opt.value.data = CREDS_VALUE;
395 opt.value.len = sizeof(CREDS_VALUE);
396 err = gssx_enc_option(xdr, &opt);
397
398 return err;
399}
400
401static int gssx_dec_name_attr(struct xdr_stream *xdr,
402 struct gssx_name_attr *attr)
403{
404 int err;
405
406 /* attr->attr */
407 err = gssx_dec_buffer(xdr, &attr->attr);
408 if (err)
409 return err;
410
411 /* attr->value */
412 err = gssx_dec_buffer(xdr, &attr->value);
413 if (err)
414 return err;
415
416 /* attr->extensions */
417 err = dummy_dec_opt_array(xdr, &attr->extensions);
418
419 return err;
420}
421
422static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
423 struct gssx_name_attr_array *naa)
424{
425 __be32 *p;
426
427 if (naa->count != 0)
428 return -EINVAL;
429
430 p = xdr_reserve_space(xdr, 4);
431 if (!p)
432 return -ENOSPC;
433 *p = 0;
434
435 return 0;
436}
437
438static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
439 struct gssx_name_attr_array *naa)
440{
441 struct gssx_name_attr dummy;
442 u32 count, i;
443 __be32 *p;
444
445 p = xdr_inline_decode(xdr, 4);
446 if (unlikely(p == NULL))
447 return -ENOSPC;
448 count = be32_to_cpup(p++);
449 for (i = 0; i < count; i++) {
450 gssx_dec_name_attr(xdr, &dummy);
451 }
452
453 naa->count = 0;
454 naa->data = NULL;
455 return 0;
456}
457
458static struct xdr_netobj zero_netobj = {};
459
460static struct gssx_name_attr_array zero_name_attr_array = {};
461
462static struct gssx_option_array zero_option_array = {};
463
464static int gssx_enc_name(struct xdr_stream *xdr,
465 struct gssx_name *name)
466{
467 int err;
468
469 /* name->display_name */
470 err = gssx_enc_buffer(xdr, &name->display_name);
471 if (err)
472 return err;
473
474 /* name->name_type */
475 err = gssx_enc_buffer(xdr, &zero_netobj);
476 if (err)
477 return err;
478
479 /* name->exported_name */
480 err = gssx_enc_buffer(xdr, &zero_netobj);
481 if (err)
482 return err;
483
484 /* name->exported_composite_name */
485 err = gssx_enc_buffer(xdr, &zero_netobj);
486 if (err)
487 return err;
488
489 /* leave name_attributes empty for now, will add once we have any
490 * to pass up at all */
491 /* name->name_attributes */
492 err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
493 if (err)
494 return err;
495
496 /* leave options empty for now, will add once we have any options
497 * to pass up at all */
498 /* name->extensions */
499 err = dummy_enc_opt_array(xdr, &zero_option_array);
500
501 return err;
502}
503
504static int gssx_dec_name(struct xdr_stream *xdr,
505 struct gssx_name *name)
506{
507 struct xdr_netobj dummy_netobj;
508 struct gssx_name_attr_array dummy_name_attr_array;
509 struct gssx_option_array dummy_option_array;
510 int err;
511
512 /* name->display_name */
513 err = gssx_dec_buffer(xdr, &name->display_name);
514 if (err)
515 return err;
516
517 /* name->name_type */
518 err = gssx_dec_buffer(xdr, &dummy_netobj);
519 if (err)
520 return err;
521
522 /* name->exported_name */
523 err = gssx_dec_buffer(xdr, &dummy_netobj);
524 if (err)
525 return err;
526
527 /* name->exported_composite_name */
528 err = gssx_dec_buffer(xdr, &dummy_netobj);
529 if (err)
530 return err;
531
532 /* we assume we have no attributes for now, so simply consume them */
533 /* name->name_attributes */
534 err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
535 if (err)
536 return err;
537
538 /* we assume we have no options for now, so simply consume them */
539 /* name->extensions */
540 err = dummy_dec_opt_array(xdr, &dummy_option_array);
541
542 return err;
543}
544
545static int dummy_enc_credel_array(struct xdr_stream *xdr,
546 struct gssx_cred_element_array *cea)
547{
548 __be32 *p;
549
550 if (cea->count != 0)
551 return -EINVAL;
552
553 p = xdr_reserve_space(xdr, 4);
554 if (!p)
555 return -ENOSPC;
556 *p = 0;
557
558 return 0;
559}
560
561static int gssx_enc_cred(struct xdr_stream *xdr,
562 struct gssx_cred *cred)
563{
564 int err;
565
566 /* cred->desired_name */
567 err = gssx_enc_name(xdr, &cred->desired_name);
568 if (err)
569 return err;
570
571 /* cred->elements */
572 err = dummy_enc_credel_array(xdr, &cred->elements);
573
574 /* cred->cred_handle_reference */
575 err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
576 if (err)
577 return err;
578
579 /* cred->needs_release */
580 err = gssx_enc_bool(xdr, cred->needs_release);
581
582 return err;
583}
584
585static int gssx_enc_ctx(struct xdr_stream *xdr,
586 struct gssx_ctx *ctx)
587{
588 __be32 *p;
589 int err;
590
591 /* ctx->exported_context_token */
592 err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
593 if (err)
594 return err;
595
596 /* ctx->state */
597 err = gssx_enc_buffer(xdr, &ctx->state);
598 if (err)
599 return err;
600
601 /* ctx->need_release */
602 err = gssx_enc_bool(xdr, ctx->need_release);
603 if (err)
604 return err;
605
606 /* ctx->mech */
607 err = gssx_enc_buffer(xdr, &ctx->mech);
608 if (err)
609 return err;
610
611 /* ctx->src_name */
612 err = gssx_enc_name(xdr, &ctx->src_name);
613 if (err)
614 return err;
615
616 /* ctx->targ_name */
617 err = gssx_enc_name(xdr, &ctx->targ_name);
618 if (err)
619 return err;
620
621 /* ctx->lifetime */
622 p = xdr_reserve_space(xdr, 8+8);
623 if (!p)
624 return -ENOSPC;
625 p = xdr_encode_hyper(p, ctx->lifetime);
626
627 /* ctx->ctx_flags */
628 p = xdr_encode_hyper(p, ctx->ctx_flags);
629
630 /* ctx->locally_initiated */
631 err = gssx_enc_bool(xdr, ctx->locally_initiated);
632 if (err)
633 return err;
634
635 /* ctx->open */
636 err = gssx_enc_bool(xdr, ctx->open);
637 if (err)
638 return err;
639
640 /* leave options empty for now, will add once we have any options
641 * to pass up at all */
642 /* ctx->options */
643 err = dummy_enc_opt_array(xdr, &ctx->options);
644
645 return err;
646}
647
648static int gssx_dec_ctx(struct xdr_stream *xdr,
649 struct gssx_ctx *ctx)
650{
651 __be32 *p;
652 int err;
653
654 /* ctx->exported_context_token */
655 err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
656 if (err)
657 return err;
658
659 /* ctx->state */
660 err = gssx_dec_buffer(xdr, &ctx->state);
661 if (err)
662 return err;
663
664 /* ctx->need_release */
665 err = gssx_dec_bool(xdr, &ctx->need_release);
666 if (err)
667 return err;
668
669 /* ctx->mech */
670 err = gssx_dec_buffer(xdr, &ctx->mech);
671 if (err)
672 return err;
673
674 /* ctx->src_name */
675 err = gssx_dec_name(xdr, &ctx->src_name);
676 if (err)
677 return err;
678
679 /* ctx->targ_name */
680 err = gssx_dec_name(xdr, &ctx->targ_name);
681 if (err)
682 return err;
683
684 /* ctx->lifetime */
685 p = xdr_inline_decode(xdr, 8+8);
686 if (unlikely(p == NULL))
687 return -ENOSPC;
688 p = xdr_decode_hyper(p, &ctx->lifetime);
689
690 /* ctx->ctx_flags */
691 p = xdr_decode_hyper(p, &ctx->ctx_flags);
692
693 /* ctx->locally_initiated */
694 err = gssx_dec_bool(xdr, &ctx->locally_initiated);
695 if (err)
696 return err;
697
698 /* ctx->open */
699 err = gssx_dec_bool(xdr, &ctx->open);
700 if (err)
701 return err;
702
703 /* we assume we have no options for now, so simply consume them */
704 /* ctx->options */
705 err = dummy_dec_opt_array(xdr, &ctx->options);
706
707 return err;
708}
709
710static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
711{
712 __be32 *p;
713 int err;
714
715 /* cb->initiator_addrtype */
716 p = xdr_reserve_space(xdr, 8);
717 if (!p)
718 return -ENOSPC;
719 p = xdr_encode_hyper(p, cb->initiator_addrtype);
720
721 /* cb->initiator_address */
722 err = gssx_enc_buffer(xdr, &cb->initiator_address);
723 if (err)
724 return err;
725
726 /* cb->acceptor_addrtype */
727 p = xdr_reserve_space(xdr, 8);
728 if (!p)
729 return -ENOSPC;
730 p = xdr_encode_hyper(p, cb->acceptor_addrtype);
731
732 /* cb->acceptor_address */
733 err = gssx_enc_buffer(xdr, &cb->acceptor_address);
734 if (err)
735 return err;
736
737 /* cb->application_data */
738 err = gssx_enc_buffer(xdr, &cb->application_data);
739
740 return err;
741}
742
743void gssx_enc_accept_sec_context(struct rpc_rqst *req,
744 struct xdr_stream *xdr,
745 struct gssx_arg_accept_sec_context *arg)
746{
747 int err;
748
749 err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
750 if (err)
751 goto done;
752
753 /* arg->context_handle */
754 if (arg->context_handle) {
755 err = gssx_enc_ctx(xdr, arg->context_handle);
756 if (err)
757 goto done;
758 } else {
759 err = gssx_enc_bool(xdr, 0);
760 }
761
762 /* arg->cred_handle */
763 if (arg->cred_handle) {
764 err = gssx_enc_cred(xdr, arg->cred_handle);
765 if (err)
766 goto done;
767 } else {
768 err = gssx_enc_bool(xdr, 0);
769 }
770
771 /* arg->input_token */
772 err = gssx_enc_in_token(xdr, &arg->input_token);
773 if (err)
774 goto done;
775
776 /* arg->input_cb */
777 if (arg->input_cb) {
778 err = gssx_enc_cb(xdr, arg->input_cb);
779 if (err)
780 goto done;
781 } else {
782 err = gssx_enc_bool(xdr, 0);
783 }
784
785 err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
786 if (err)
787 goto done;
788
789 /* leave options empty for now, will add once we have any options
790 * to pass up at all */
791 /* arg->options */
792 err = dummy_enc_opt_array(xdr, &arg->options);
793
794done:
795 if (err)
796 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
797}
798
799int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
800 struct xdr_stream *xdr,
801 struct gssx_res_accept_sec_context *res)
802{
803 int err;
804
805 /* res->status */
806 err = gssx_dec_status(xdr, &res->status);
807 if (err)
808 return err;
809
810 /* res->context_handle */
811 if (gssx_check_pointer(xdr)) {
812 err = gssx_dec_ctx(xdr, res->context_handle);
813 if (err)
814 return err;
815 } else {
816 res->context_handle = NULL;
817 }
818
819 /* res->output_token */
820 if (gssx_check_pointer(xdr)) {
821 err = gssx_dec_buffer(xdr, res->output_token);
822 if (err)
823 return err;
824 } else {
825 res->output_token = NULL;
826 }
827
828 /* res->delegated_cred_handle */
829 if (gssx_check_pointer(xdr)) {
830 /* we do not support upcall servers sending this data. */
831 return -EINVAL;
832 }
833
834 /* res->options */
835 err = gssx_dec_option_array(xdr, &res->options);
836
837 return err;
838}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644
index 000000000000..1c98b27d870c
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -0,0 +1,264 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _LINUX_GSS_RPC_XDR_H
22#define _LINUX_GSS_RPC_XDR_H
23
24#include <linux/sunrpc/xdr.h>
25#include <linux/sunrpc/clnt.h>
26#include <linux/sunrpc/xprtsock.h>
27
28#ifdef RPC_DEBUG
29# define RPCDBG_FACILITY RPCDBG_AUTH
30#endif
31
32#define LUCID_OPTION "exported_context_type"
33#define LUCID_VALUE "linux_lucid_v1"
34#define CREDS_OPTION "exported_creds_type"
35#define CREDS_VALUE "linux_creds_v1"
36
37typedef struct xdr_netobj gssx_buffer;
38typedef struct xdr_netobj utf8string;
39typedef struct xdr_netobj gssx_OID;
40
41enum gssx_cred_usage {
42 GSSX_C_INITIATE = 1,
43 GSSX_C_ACCEPT = 2,
44 GSSX_C_BOTH = 3,
45};
46
47struct gssx_option {
48 gssx_buffer option;
49 gssx_buffer value;
50};
51
52struct gssx_option_array {
53 u32 count;
54 struct gssx_option *data;
55};
56
57struct gssx_status {
58 u64 major_status;
59 gssx_OID mech;
60 u64 minor_status;
61 utf8string major_status_string;
62 utf8string minor_status_string;
63 gssx_buffer server_ctx;
64 struct gssx_option_array options;
65};
66
67struct gssx_call_ctx {
68 utf8string locale;
69 gssx_buffer server_ctx;
70 struct gssx_option_array options;
71};
72
73struct gssx_name_attr {
74 gssx_buffer attr;
75 gssx_buffer value;
76 struct gssx_option_array extensions;
77};
78
79struct gssx_name_attr_array {
80 u32 count;
81 struct gssx_name_attr *data;
82};
83
84struct gssx_name {
85 gssx_buffer display_name;
86};
87typedef struct gssx_name gssx_name;
88
89struct gssx_cred_element {
90 gssx_name MN;
91 gssx_OID mech;
92 u32 cred_usage;
93 u64 initiator_time_rec;
94 u64 acceptor_time_rec;
95 struct gssx_option_array options;
96};
97
98struct gssx_cred_element_array {
99 u32 count;
100 struct gssx_cred_element *data;
101};
102
103struct gssx_cred {
104 gssx_name desired_name;
105 struct gssx_cred_element_array elements;
106 gssx_buffer cred_handle_reference;
107 u32 needs_release;
108};
109
110struct gssx_ctx {
111 gssx_buffer exported_context_token;
112 gssx_buffer state;
113 u32 need_release;
114 gssx_OID mech;
115 gssx_name src_name;
116 gssx_name targ_name;
117 u64 lifetime;
118 u64 ctx_flags;
119 u32 locally_initiated;
120 u32 open;
121 struct gssx_option_array options;
122};
123
124struct gssx_cb {
125 u64 initiator_addrtype;
126 gssx_buffer initiator_address;
127 u64 acceptor_addrtype;
128 gssx_buffer acceptor_address;
129 gssx_buffer application_data;
130};
131
132
133/* This structure is not defined in the protocol.
134 * It is used in the kernel to carry around a big buffer
135 * as a set of pages */
136struct gssp_in_token {
137 struct page **pages; /* Array of contiguous pages */
138 unsigned int page_base; /* Start of page data */
139 unsigned int page_len; /* Length of page data */
140};
141
142struct gssx_arg_accept_sec_context {
143 struct gssx_call_ctx call_ctx;
144 struct gssx_ctx *context_handle;
145 struct gssx_cred *cred_handle;
146 struct gssp_in_token input_token;
147 struct gssx_cb *input_cb;
148 u32 ret_deleg_cred;
149 struct gssx_option_array options;
150};
151
152struct gssx_res_accept_sec_context {
153 struct gssx_status status;
154 struct gssx_ctx *context_handle;
155 gssx_buffer *output_token;
156 /* struct gssx_cred *delegated_cred_handle; not used in kernel */
157 struct gssx_option_array options;
158};
159
160
161
162#define gssx_enc_indicate_mechs NULL
163#define gssx_dec_indicate_mechs NULL
164#define gssx_enc_get_call_context NULL
165#define gssx_dec_get_call_context NULL
166#define gssx_enc_import_and_canon_name NULL
167#define gssx_dec_import_and_canon_name NULL
168#define gssx_enc_export_cred NULL
169#define gssx_dec_export_cred NULL
170#define gssx_enc_import_cred NULL
171#define gssx_dec_import_cred NULL
172#define gssx_enc_acquire_cred NULL
173#define gssx_dec_acquire_cred NULL
174#define gssx_enc_store_cred NULL
175#define gssx_dec_store_cred NULL
176#define gssx_enc_init_sec_context NULL
177#define gssx_dec_init_sec_context NULL
178void gssx_enc_accept_sec_context(struct rpc_rqst *req,
179 struct xdr_stream *xdr,
180 struct gssx_arg_accept_sec_context *args);
181int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
182 struct xdr_stream *xdr,
183 struct gssx_res_accept_sec_context *res);
184#define gssx_enc_release_handle NULL
185#define gssx_dec_release_handle NULL
186#define gssx_enc_get_mic NULL
187#define gssx_dec_get_mic NULL
188#define gssx_enc_verify NULL
189#define gssx_dec_verify NULL
190#define gssx_enc_wrap NULL
191#define gssx_dec_wrap NULL
192#define gssx_enc_unwrap NULL
193#define gssx_dec_unwrap NULL
194#define gssx_enc_wrap_size_limit NULL
195#define gssx_dec_wrap_size_limit NULL
196
197/* non implemented calls are set to 0 size */
198#define GSSX_ARG_indicate_mechs_sz 0
199#define GSSX_RES_indicate_mechs_sz 0
200#define GSSX_ARG_get_call_context_sz 0
201#define GSSX_RES_get_call_context_sz 0
202#define GSSX_ARG_import_and_canon_name_sz 0
203#define GSSX_RES_import_and_canon_name_sz 0
204#define GSSX_ARG_export_cred_sz 0
205#define GSSX_RES_export_cred_sz 0
206#define GSSX_ARG_import_cred_sz 0
207#define GSSX_RES_import_cred_sz 0
208#define GSSX_ARG_acquire_cred_sz 0
209#define GSSX_RES_acquire_cred_sz 0
210#define GSSX_ARG_store_cred_sz 0
211#define GSSX_RES_store_cred_sz 0
212#define GSSX_ARG_init_sec_context_sz 0
213#define GSSX_RES_init_sec_context_sz 0
214
215#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
216 8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
217 8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
218#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
219 4 + 4 + 4)
220#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
221#define GSSX_default_in_token_sz 4 /* does *not* include token data */
222#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
223#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
224 GSSX_default_in_ctx_hndl_sz + \
225 GSSX_default_in_cred_sz + \
226 GSSX_default_in_token_sz + \
227 GSSX_default_in_cb_sz + \
228 4 /* no deleg creds boolean */ + \
229 4) /* empty options */
230
231/* somewhat arbitrary numbers but large enough (we ignore some of the data
232 * sent down, but it is part of the protocol so we need enough space to take
233 * it in) */
234#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
235#define GSSX_max_output_handle_sz 128
236#define GSSX_max_oid_sz 16
237#define GSSX_max_princ_sz 256
238#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
239 16 + 4 + GSSX_max_oid_sz + \
240 2 * GSSX_max_princ_sz + \
241 8 + 8 + 4 + 4 + 4)
242#define GSSX_max_output_token_sz 1024
243#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
244#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
245 GSSX_default_ctx_sz + \
246 GSSX_max_output_token_sz + \
247 4 + GSSX_max_creds_sz)
248
249#define GSSX_ARG_release_handle_sz 0
250#define GSSX_RES_release_handle_sz 0
251#define GSSX_ARG_get_mic_sz 0
252#define GSSX_RES_get_mic_sz 0
253#define GSSX_ARG_verify_sz 0
254#define GSSX_RES_verify_sz 0
255#define GSSX_ARG_wrap_sz 0
256#define GSSX_RES_wrap_sz 0
257#define GSSX_ARG_unwrap_sz 0
258#define GSSX_RES_unwrap_sz 0
259#define GSSX_ARG_wrap_size_limit_sz 0
260#define GSSX_RES_wrap_size_limit_sz 0
261
262
263
264#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c3ba570222dc..871c73c92165 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -48,8 +48,8 @@
48#include <linux/sunrpc/svcauth.h> 48#include <linux/sunrpc/svcauth.h>
49#include <linux/sunrpc/svcauth_gss.h> 49#include <linux/sunrpc/svcauth_gss.h>
50#include <linux/sunrpc/cache.h> 50#include <linux/sunrpc/cache.h>
51#include "gss_rpc_upcall.h"
51 52
52#include "../netns.h"
53 53
54#ifdef RPC_DEBUG 54#ifdef RPC_DEBUG
55# define RPCDBG_FACILITY RPCDBG_AUTH 55# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -497,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
497 len = qword_get(&mesg, buf, mlen); 497 len = qword_get(&mesg, buf, mlen);
498 if (len < 0) 498 if (len < 0)
499 goto out; 499 goto out;
500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL); 500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
501 NULL, GFP_KERNEL);
501 if (status) 502 if (status)
502 goto out; 503 goto out;
503 504
@@ -505,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
505 len = qword_get(&mesg, buf, mlen); 506 len = qword_get(&mesg, buf, mlen);
506 if (len > 0) { 507 if (len > 0) {
507 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); 508 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
508 if (!rsci.cred.cr_principal) 509 if (!rsci.cred.cr_principal) {
510 status = -ENOMEM;
509 goto out; 511 goto out;
512 }
510 } 513 }
511 514
512 } 515 }
@@ -987,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
987} 990}
988 991
989static inline int 992static inline int
990gss_read_verf(struct rpc_gss_wire_cred *gc, 993gss_read_common_verf(struct rpc_gss_wire_cred *gc,
991 struct kvec *argv, __be32 *authp, 994 struct kvec *argv, __be32 *authp,
992 struct xdr_netobj *in_handle, 995 struct xdr_netobj *in_handle)
993 struct xdr_netobj *in_token)
994{ 996{
995 struct xdr_netobj tmpobj;
996
997 /* Read the verifier; should be NULL: */ 997 /* Read the verifier; should be NULL: */
998 *authp = rpc_autherr_badverf; 998 *authp = rpc_autherr_badverf;
999 if (argv->iov_len < 2 * 4) 999 if (argv->iov_len < 2 * 4)
@@ -1009,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1009 if (dup_netobj(in_handle, &gc->gc_ctx)) 1009 if (dup_netobj(in_handle, &gc->gc_ctx))
1010 return SVC_CLOSE; 1010 return SVC_CLOSE;
1011 *authp = rpc_autherr_badverf; 1011 *authp = rpc_autherr_badverf;
1012
1013 return 0;
1014}
1015
1016static inline int
1017gss_read_verf(struct rpc_gss_wire_cred *gc,
1018 struct kvec *argv, __be32 *authp,
1019 struct xdr_netobj *in_handle,
1020 struct xdr_netobj *in_token)
1021{
1022 struct xdr_netobj tmpobj;
1023 int res;
1024
1025 res = gss_read_common_verf(gc, argv, authp, in_handle);
1026 if (res)
1027 return res;
1028
1012 if (svc_safe_getnetobj(argv, &tmpobj)) { 1029 if (svc_safe_getnetobj(argv, &tmpobj)) {
1013 kfree(in_handle->data); 1030 kfree(in_handle->data);
1014 return SVC_DENIED; 1031 return SVC_DENIED;
@@ -1021,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1021 return 0; 1038 return 0;
1022} 1039}
1023 1040
1041/* Ok this is really heavily depending on a set of semantics in
1042 * how rqstp is set up by svc_recv and pages laid down by the
1043 * server when reading a request. We are basically guaranteed that
1044 * the token lays all down linearly across a set of pages, starting
1045 * at iov_base in rq_arg.head[0] which happens to be the first of a
1046 * set of pages stored in rq_pages[].
1047 * rq_arg.head[0].iov_base will provide us the page_base to pass
1048 * to the upcall.
1049 */
1050static inline int
1051gss_read_proxy_verf(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp,
1053 struct xdr_netobj *in_handle,
1054 struct gssp_in_token *in_token)
1055{
1056 struct kvec *argv = &rqstp->rq_arg.head[0];
1057 u32 inlen;
1058 int res;
1059
1060 res = gss_read_common_verf(gc, argv, authp, in_handle);
1061 if (res)
1062 return res;
1063
1064 inlen = svc_getnl(argv);
1065 if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
1066 return SVC_DENIED;
1067
1068 in_token->pages = rqstp->rq_pages;
1069 in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
1070 in_token->page_len = inlen;
1071
1072 return 0;
1073}
1074
1024static inline int 1075static inline int
1025gss_write_resv(struct kvec *resv, size_t size_limit, 1076gss_write_resv(struct kvec *resv, size_t size_limit,
1026 struct xdr_netobj *out_handle, struct xdr_netobj *out_token, 1077 struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1048,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
1048 * the upcall results are available, write the verifier and result. 1099 * the upcall results are available, write the verifier and result.
1049 * Otherwise, drop the request pending an answer to the upcall. 1100 * Otherwise, drop the request pending an answer to the upcall.
1050 */ 1101 */
1051static int svcauth_gss_handle_init(struct svc_rqst *rqstp, 1102static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp) 1103 struct rpc_gss_wire_cred *gc, __be32 *authp)
1053{ 1104{
1054 struct kvec *argv = &rqstp->rq_arg.head[0]; 1105 struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1088,6 +1139,287 @@ out:
1088 return ret; 1139 return ret;
1089} 1140}
1090 1141
1142static int gss_proxy_save_rsc(struct cache_detail *cd,
1143 struct gssp_upcall_data *ud,
1144 uint64_t *handle)
1145{
1146 struct rsc rsci, *rscp = NULL;
1147 static atomic64_t ctxhctr;
1148 long long ctxh;
1149 struct gss_api_mech *gm = NULL;
1150 time_t expiry;
1151 int status = -EINVAL;
1152
1153 memset(&rsci, 0, sizeof(rsci));
1154 /* context handle */
1155 status = -ENOMEM;
1156 /* the handle needs to be just a unique id,
1157 * use a static counter */
1158 ctxh = atomic64_inc_return(&ctxhctr);
1159
1160 /* make a copy for the caller */
1161 *handle = ctxh;
1162
1163 /* make a copy for the rsc cache */
1164 if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
1165 goto out;
1166 rscp = rsc_lookup(cd, &rsci);
1167 if (!rscp)
1168 goto out;
1169
1170 /* creds */
1171 if (!ud->found_creds) {
1172 /* userspace seem buggy, we should always get at least a
1173 * mapping to nobody */
1174 dprintk("RPC: No creds found, marking Negative!\n");
1175 set_bit(CACHE_NEGATIVE, &rsci.h.flags);
1176 } else {
1177
1178 /* steal creds */
1179 rsci.cred = ud->creds;
1180 memset(&ud->creds, 0, sizeof(struct svc_cred));
1181
1182 status = -EOPNOTSUPP;
1183 /* get mech handle from OID */
1184 gm = gss_mech_get_by_OID(&ud->mech_oid);
1185 if (!gm)
1186 goto out;
1187
1188 status = -EINVAL;
1189 /* mech-specific data: */
1190 status = gss_import_sec_context(ud->out_handle.data,
1191 ud->out_handle.len,
1192 gm, &rsci.mechctx,
1193 &expiry, GFP_KERNEL);
1194 if (status)
1195 goto out;
1196 }
1197
1198 rsci.h.expiry_time = expiry;
1199 rscp = rsc_update(cd, &rsci, rscp);
1200 status = 0;
1201out:
1202 gss_mech_put(gm);
1203 rsc_free(&rsci);
1204 if (rscp)
1205 cache_put(&rscp->h, cd);
1206 else
1207 status = -ENOMEM;
1208 return status;
1209}
1210
1211static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
1212 struct rpc_gss_wire_cred *gc, __be32 *authp)
1213{
1214 struct kvec *resv = &rqstp->rq_res.head[0];
1215 struct xdr_netobj cli_handle;
1216 struct gssp_upcall_data ud;
1217 uint64_t handle;
1218 int status;
1219 int ret;
1220 struct net *net = rqstp->rq_xprt->xpt_net;
1221 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1222
1223 memset(&ud, 0, sizeof(ud));
1224 ret = gss_read_proxy_verf(rqstp, gc, authp,
1225 &ud.in_handle, &ud.in_token);
1226 if (ret)
1227 return ret;
1228
1229 ret = SVC_CLOSE;
1230
1231 /* Perform synchronous upcall to gss-proxy */
1232 status = gssp_accept_sec_context_upcall(net, &ud);
1233 if (status)
1234 goto out;
1235
1236 dprintk("RPC: svcauth_gss: gss major status = %d\n",
1237 ud.major_status);
1238
1239 switch (ud.major_status) {
1240 case GSS_S_CONTINUE_NEEDED:
1241 cli_handle = ud.out_handle;
1242 break;
1243 case GSS_S_COMPLETE:
1244 status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
1245 if (status)
1246 goto out;
1247 cli_handle.data = (u8 *)&handle;
1248 cli_handle.len = sizeof(handle);
1249 break;
1250 default:
1251 ret = SVC_CLOSE;
1252 goto out;
1253 }
1254
1255 /* Got an answer to the upcall; use it: */
1256 if (gss_write_init_verf(sn->rsc_cache, rqstp,
1257 &cli_handle, &ud.major_status))
1258 goto out;
1259 if (gss_write_resv(resv, PAGE_SIZE,
1260 &cli_handle, &ud.out_token,
1261 ud.major_status, ud.minor_status))
1262 goto out;
1263
1264 ret = SVC_COMPLETE;
1265out:
1266 gssp_free_upcall_data(&ud);
1267 return ret;
1268}
1269
1270DEFINE_SPINLOCK(use_gssp_lock);
1271
1272static bool use_gss_proxy(struct net *net)
1273{
1274 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1275
1276 if (sn->use_gss_proxy != -1)
1277 return sn->use_gss_proxy;
1278 spin_lock(&use_gssp_lock);
1279 /*
1280 * If you wanted gss-proxy, you should have said so before
1281 * starting to accept requests:
1282 */
1283 sn->use_gss_proxy = 0;
1284 spin_unlock(&use_gssp_lock);
1285 return 0;
1286}
1287
1288#ifdef CONFIG_PROC_FS
1289
1290static bool set_gss_proxy(struct net *net, int type)
1291{
1292 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1293 int ret = 0;
1294
1295 WARN_ON_ONCE(type != 0 && type != 1);
1296 spin_lock(&use_gssp_lock);
1297 if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
1298 sn->use_gss_proxy = type;
1299 else
1300 ret = -EBUSY;
1301 spin_unlock(&use_gssp_lock);
1302 wake_up(&sn->gssp_wq);
1303 return ret;
1304}
1305
1306static inline bool gssp_ready(struct sunrpc_net *sn)
1307{
1308 switch (sn->use_gss_proxy) {
1309 case -1:
1310 return false;
1311 case 0:
1312 return true;
1313 case 1:
1314 return sn->gssp_clnt;
1315 }
1316 WARN_ON_ONCE(1);
1317 return false;
1318}
1319
1320static int wait_for_gss_proxy(struct net *net)
1321{
1322 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1323
1324 return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
1325}
1326
1327
1328static ssize_t write_gssp(struct file *file, const char __user *buf,
1329 size_t count, loff_t *ppos)
1330{
1331 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1332 char tbuf[20];
1333 unsigned long i;
1334 int res;
1335
1336 if (*ppos || count > sizeof(tbuf)-1)
1337 return -EINVAL;
1338 if (copy_from_user(tbuf, buf, count))
1339 return -EFAULT;
1340
1341 tbuf[count] = 0;
1342 res = kstrtoul(tbuf, 0, &i);
1343 if (res)
1344 return res;
1345 if (i != 1)
1346 return -EINVAL;
1347 res = set_gss_proxy(net, 1);
1348 if (res)
1349 return res;
1350 res = set_gssp_clnt(net);
1351 if (res)
1352 return res;
1353 return count;
1354}
1355
1356static ssize_t read_gssp(struct file *file, char __user *buf,
1357 size_t count, loff_t *ppos)
1358{
1359 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1360 unsigned long p = *ppos;
1361 char tbuf[10];
1362 size_t len;
1363 int ret;
1364
1365 ret = wait_for_gss_proxy(net);
1366 if (ret)
1367 return ret;
1368
1369 snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
1370 len = strlen(tbuf);
1371 if (p >= len)
1372 return 0;
1373 len -= p;
1374 if (len > count)
1375 len = count;
1376 if (copy_to_user(buf, (void *)(tbuf+p), len))
1377 return -EFAULT;
1378 *ppos += len;
1379 return len;
1380}
1381
1382static const struct file_operations use_gss_proxy_ops = {
1383 .open = nonseekable_open,
1384 .write = write_gssp,
1385 .read = read_gssp,
1386};
1387
1388static int create_use_gss_proxy_proc_entry(struct net *net)
1389{
1390 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1391 struct proc_dir_entry **p = &sn->use_gssp_proc;
1392
1393 sn->use_gss_proxy = -1;
1394 *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
1395 sn->proc_net_rpc,
1396 &use_gss_proxy_ops, net);
1397 if (!*p)
1398 return -ENOMEM;
1399 init_gssp_clnt(sn);
1400 return 0;
1401}
1402
1403static void destroy_use_gss_proxy_proc_entry(struct net *net)
1404{
1405 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1406
1407 if (sn->use_gssp_proc) {
1408 remove_proc_entry("use-gss-proxy", sn->proc_net_rpc);
1409 clear_gssp_clnt(sn);
1410 }
1411}
1412#else /* CONFIG_PROC_FS */
1413
1414static int create_use_gss_proxy_proc_entry(struct net *net)
1415{
1416 return 0;
1417}
1418
1419static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
1420
1421#endif /* CONFIG_PROC_FS */
1422
1091/* 1423/*
1092 * Accept an rpcsec packet. 1424 * Accept an rpcsec packet.
1093 * If context establishment, punt to user space 1425 * If context establishment, punt to user space
@@ -1154,7 +1486,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1154 switch (gc->gc_proc) { 1486 switch (gc->gc_proc) {
1155 case RPC_GSS_PROC_INIT: 1487 case RPC_GSS_PROC_INIT:
1156 case RPC_GSS_PROC_CONTINUE_INIT: 1488 case RPC_GSS_PROC_CONTINUE_INIT:
1157 return svcauth_gss_handle_init(rqstp, gc, authp); 1489 if (use_gss_proxy(SVC_NET(rqstp)))
1490 return svcauth_gss_proxy_init(rqstp, gc, authp);
1491 else
1492 return svcauth_gss_legacy_init(rqstp, gc, authp);
1158 case RPC_GSS_PROC_DATA: 1493 case RPC_GSS_PROC_DATA:
1159 case RPC_GSS_PROC_DESTROY: 1494 case RPC_GSS_PROC_DESTROY:
1160 /* Look up the context, and check the verifier: */ 1495 /* Look up the context, and check the verifier: */
@@ -1531,7 +1866,12 @@ gss_svc_init_net(struct net *net)
1531 rv = rsi_cache_create_net(net); 1866 rv = rsi_cache_create_net(net);
1532 if (rv) 1867 if (rv)
1533 goto out1; 1868 goto out1;
1869 rv = create_use_gss_proxy_proc_entry(net);
1870 if (rv)
1871 goto out2;
1534 return 0; 1872 return 0;
1873out2:
1874 destroy_use_gss_proxy_proc_entry(net);
1535out1: 1875out1:
1536 rsc_cache_destroy_net(net); 1876 rsc_cache_destroy_net(net);
1537 return rv; 1877 return rv;
@@ -1540,6 +1880,7 @@ out1:
1540void 1880void
1541gss_svc_shutdown_net(struct net *net) 1881gss_svc_shutdown_net(struct net *net)
1542{ 1882{
1883 destroy_use_gss_proxy_proc_entry(net);
1543 rsi_cache_destroy_net(net); 1884 rsi_cache_destroy_net(net);
1544 rsc_cache_destroy_net(net); 1885 rsc_cache_destroy_net(net);
1545} 1886}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ce2d180d05a4..80fe5c86efd1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -986,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
986 nonseekable_open(inode, filp); 986 nonseekable_open(inode, filp);
987 if (filp->f_mode & FMODE_READ) { 987 if (filp->f_mode & FMODE_READ) {
988 rp = kmalloc(sizeof(*rp), GFP_KERNEL); 988 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
989 if (!rp) 989 if (!rp) {
990 module_put(cd->owner);
990 return -ENOMEM; 991 return -ENOMEM;
992 }
991 rp->offset = 0; 993 rp->offset = 0;
992 rp->q.reader = 1; 994 rp->q.reader = 1;
993 atomic_inc(&cd->readers); 995 atomic_inc(&cd->readers);
@@ -1460,7 +1462,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1460static ssize_t cache_read_procfs(struct file *filp, char __user *buf, 1462static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
1461 size_t count, loff_t *ppos) 1463 size_t count, loff_t *ppos)
1462{ 1464{
1463 struct cache_detail *cd = PDE(file_inode(filp))->data; 1465 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1464 1466
1465 return cache_read(filp, buf, count, ppos, cd); 1467 return cache_read(filp, buf, count, ppos, cd);
1466} 1468}
@@ -1468,14 +1470,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
1468static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, 1470static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
1469 size_t count, loff_t *ppos) 1471 size_t count, loff_t *ppos)
1470{ 1472{
1471 struct cache_detail *cd = PDE(file_inode(filp))->data; 1473 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1472 1474
1473 return cache_write(filp, buf, count, ppos, cd); 1475 return cache_write(filp, buf, count, ppos, cd);
1474} 1476}
1475 1477
1476static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) 1478static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
1477{ 1479{
1478 struct cache_detail *cd = PDE(file_inode(filp))->data; 1480 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1479 1481
1480 return cache_poll(filp, wait, cd); 1482 return cache_poll(filp, wait, cd);
1481} 1483}
@@ -1484,21 +1486,21 @@ static long cache_ioctl_procfs(struct file *filp,
1484 unsigned int cmd, unsigned long arg) 1486 unsigned int cmd, unsigned long arg)
1485{ 1487{
1486 struct inode *inode = file_inode(filp); 1488 struct inode *inode = file_inode(filp);
1487 struct cache_detail *cd = PDE(inode)->data; 1489 struct cache_detail *cd = PDE_DATA(inode);
1488 1490
1489 return cache_ioctl(inode, filp, cmd, arg, cd); 1491 return cache_ioctl(inode, filp, cmd, arg, cd);
1490} 1492}
1491 1493
1492static int cache_open_procfs(struct inode *inode, struct file *filp) 1494static int cache_open_procfs(struct inode *inode, struct file *filp)
1493{ 1495{
1494 struct cache_detail *cd = PDE(inode)->data; 1496 struct cache_detail *cd = PDE_DATA(inode);
1495 1497
1496 return cache_open(inode, filp, cd); 1498 return cache_open(inode, filp, cd);
1497} 1499}
1498 1500
1499static int cache_release_procfs(struct inode *inode, struct file *filp) 1501static int cache_release_procfs(struct inode *inode, struct file *filp)
1500{ 1502{
1501 struct cache_detail *cd = PDE(inode)->data; 1503 struct cache_detail *cd = PDE_DATA(inode);
1502 1504
1503 return cache_release(inode, filp, cd); 1505 return cache_release(inode, filp, cd);
1504} 1506}
@@ -1516,14 +1518,14 @@ static const struct file_operations cache_file_operations_procfs = {
1516 1518
1517static int content_open_procfs(struct inode *inode, struct file *filp) 1519static int content_open_procfs(struct inode *inode, struct file *filp)
1518{ 1520{
1519 struct cache_detail *cd = PDE(inode)->data; 1521 struct cache_detail *cd = PDE_DATA(inode);
1520 1522
1521 return content_open(inode, filp, cd); 1523 return content_open(inode, filp, cd);
1522} 1524}
1523 1525
1524static int content_release_procfs(struct inode *inode, struct file *filp) 1526static int content_release_procfs(struct inode *inode, struct file *filp)
1525{ 1527{
1526 struct cache_detail *cd = PDE(inode)->data; 1528 struct cache_detail *cd = PDE_DATA(inode);
1527 1529
1528 return content_release(inode, filp, cd); 1530 return content_release(inode, filp, cd);
1529} 1531}
@@ -1537,14 +1539,14 @@ static const struct file_operations content_file_operations_procfs = {
1537 1539
1538static int open_flush_procfs(struct inode *inode, struct file *filp) 1540static int open_flush_procfs(struct inode *inode, struct file *filp)
1539{ 1541{
1540 struct cache_detail *cd = PDE(inode)->data; 1542 struct cache_detail *cd = PDE_DATA(inode);
1541 1543
1542 return open_flush(inode, filp, cd); 1544 return open_flush(inode, filp, cd);
1543} 1545}
1544 1546
1545static int release_flush_procfs(struct inode *inode, struct file *filp) 1547static int release_flush_procfs(struct inode *inode, struct file *filp)
1546{ 1548{
1547 struct cache_detail *cd = PDE(inode)->data; 1549 struct cache_detail *cd = PDE_DATA(inode);
1548 1550
1549 return release_flush(inode, filp, cd); 1551 return release_flush(inode, filp, cd);
1550} 1552}
@@ -1552,7 +1554,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
1552static ssize_t read_flush_procfs(struct file *filp, char __user *buf, 1554static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
1553 size_t count, loff_t *ppos) 1555 size_t count, loff_t *ppos)
1554{ 1556{
1555 struct cache_detail *cd = PDE(file_inode(filp))->data; 1557 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1556 1558
1557 return read_flush(filp, buf, count, ppos, cd); 1559 return read_flush(filp, buf, count, ppos, cd);
1558} 1560}
@@ -1561,7 +1563,7 @@ static ssize_t write_flush_procfs(struct file *filp,
1561 const char __user *buf, 1563 const char __user *buf,
1562 size_t count, loff_t *ppos) 1564 size_t count, loff_t *ppos)
1563{ 1565{
1564 struct cache_detail *cd = PDE(file_inode(filp))->data; 1566 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1565 1567
1566 return write_flush(filp, buf, count, ppos, cd); 1568 return write_flush(filp, buf, count, ppos, cd);
1567} 1569}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d259fa966927..3f7930f938cc 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -413,6 +413,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
413 413
414 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) 414 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
415 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; 415 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
416 if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
417 xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
416 /* 418 /*
417 * If the caller chooses not to specify a hostname, whip 419 * If the caller chooses not to specify a hostname, whip
418 * up a string representation of the passed-in address. 420 * up a string representation of the passed-in address.
@@ -681,6 +683,7 @@ rpc_release_client(struct rpc_clnt *clnt)
681 if (atomic_dec_and_test(&clnt->cl_count)) 683 if (atomic_dec_and_test(&clnt->cl_count))
682 rpc_free_auth(clnt); 684 rpc_free_auth(clnt);
683} 685}
686EXPORT_SYMBOL_GPL(rpc_release_client);
684 687
685/** 688/**
686 * rpc_bind_new_program - bind a new RPC program to an existing client 689 * rpc_bind_new_program - bind a new RPC program to an existing client
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index ce7bd449173d..7111a4c9113b 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,12 @@ struct sunrpc_net {
23 struct rpc_clnt *rpcb_local_clnt4; 23 struct rpc_clnt *rpcb_local_clnt4;
24 spinlock_t rpcb_clnt_lock; 24 spinlock_t rpcb_clnt_lock;
25 unsigned int rpcb_users; 25 unsigned int rpcb_users;
26
27 struct mutex gssp_lock;
28 wait_queue_head_t gssp_wq;
29 struct rpc_clnt *gssp_clnt;
30 int use_gss_proxy;
31 struct proc_dir_entry *use_gssp_proc;
26}; 32};
27 33
28extern int sunrpc_net_id; 34extern int sunrpc_net_id;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index bc2068ee795b..21b75cb08c03 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -64,7 +64,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
64 64
65static int rpc_proc_open(struct inode *inode, struct file *file) 65static int rpc_proc_open(struct inode *inode, struct file *file)
66{ 66{
67 return single_open(file, rpc_proc_show, PDE(inode)->data); 67 return single_open(file, rpc_proc_show, PDE_DATA(inode));
68} 68}
69 69
70static const struct file_operations rpc_proc_fops = { 70static const struct file_operations rpc_proc_fops = {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 745fca3cfd36..095363eee764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1300,6 +1300,8 @@ found:
1300 -PTR_ERR(xprt)); 1300 -PTR_ERR(xprt));
1301 goto out; 1301 goto out;
1302 } 1302 }
1303 if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
1304 xprt->idle_timeout = 0;
1303 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1305 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1304 if (xprt_has_timer(xprt)) 1306 if (xprt_has_timer(xprt))
1305 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1307 setup_timer(&xprt->timer, xprt_init_autodisconnect,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9c2825827dec..ffd50348a509 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2655,6 +2655,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2655 } 2655 }
2656 xprt_set_bound(xprt); 2656 xprt_set_bound(xprt);
2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); 2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
2658 ret = ERR_PTR(xs_local_setup_socket(transport));
2659 if (ret)
2660 goto out_err;
2658 break; 2661 break;
2659 default: 2662 default:
2660 ret = ERR_PTR(-EAFNOSUPPORT); 2663 ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 25e159c2feb4..e5f3da507823 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
584{ 584{
585 int bp_index; 585 int bp_index;
586 586
587 /* 587 /* Prepare broadcast link message for reliable transmission,
588 * Prepare broadcast link message for reliable transmission,
589 * if first time trying to send it; 588 * if first time trying to send it;
590 * preparation is skipped for broadcast link protocol messages 589 * preparation is skipped for broadcast link protocol messages
591 * since they are sent in an unreliable manner and don't need it 590 * since they are sent in an unreliable manner and don't need it
@@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
611 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 610 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
612 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; 611 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
613 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; 612 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
613 struct tipc_bearer *b = p;
614 struct sk_buff *tbuf;
614 615
615 if (!p) 616 if (!p)
616 break; /* no more bearers to try */ 617 break; /* No more bearers to try */
618
619 if (tipc_bearer_blocked(p)) {
620 if (!s || tipc_bearer_blocked(s))
621 continue; /* Can't use either bearer */
622 b = s;
623 }
617 624
618 tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); 625 tipc_nmap_diff(&bcbearer->remains, &b->nodes,
626 &bcbearer->remains_new);
619 if (bcbearer->remains_new.count == bcbearer->remains.count) 627 if (bcbearer->remains_new.count == bcbearer->remains.count)
620 continue; /* bearer pair doesn't add anything */ 628 continue; /* Nothing added by bearer pair */
621 629
622 if (!tipc_bearer_blocked(p)) 630 if (bp_index == 0) {
623 tipc_bearer_send(p, buf, &p->bcast_addr); 631 /* Use original buffer for first bearer */
624 else if (s && !tipc_bearer_blocked(s)) 632 tipc_bearer_send(b, buf, &b->bcast_addr);
625 /* unable to send on primary bearer */ 633 } else {
626 tipc_bearer_send(s, buf, &s->bcast_addr); 634 /* Avoid concurrent buffer access */
627 else 635 tbuf = pskb_copy(buf, GFP_ATOMIC);
628 /* unable to send on either bearer */ 636 if (!tbuf)
629 continue; 637 break;
638 tipc_bearer_send(b, tbuf, &b->bcast_addr);
639 kfree_skb(tbuf); /* Bearer keeps a clone */
640 }
630 641
642 /* Swap bearers for next packet */
631 if (s) { 643 if (s) {
632 bcbearer->bpairs[bp_index].primary = s; 644 bcbearer->bpairs[bp_index].primary = s;
633 bcbearer->bpairs[bp_index].secondary = p; 645 bcbearer->bpairs[bp_index].secondary = p;
634 } 646 }
635 647
636 if (bcbearer->remains_new.count == 0) 648 if (bcbearer->remains_new.count == 0)
637 break; /* all targets reached */ 649 break; /* All targets reached */
638 650
639 bcbearer->remains = bcbearer->remains_new; 651 bcbearer->remains = bcbearer->remains_new;
640 } 652 }
diff --git a/net/tipc/link.c b/net/tipc/link.c
index daa6080a2a0c..a80feee5197a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2306,8 +2306,11 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); 2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
2307 u32 msg_typ = msg_type(tunnel_msg); 2307 u32 msg_typ = msg_type(tunnel_msg);
2308 u32 msg_count = msg_msgcnt(tunnel_msg); 2308 u32 msg_count = msg_msgcnt(tunnel_msg);
2309 u32 bearer_id = msg_bearer_id(tunnel_msg);
2309 2310
2310 dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; 2311 if (bearer_id >= MAX_BEARERS)
2312 goto exit;
2313 dest_link = (*l_ptr)->owner->links[bearer_id];
2311 if (!dest_link) 2314 if (!dest_link)
2312 goto exit; 2315 goto exit;
2313 if (dest_link == *l_ptr) { 2316 if (dest_link == *l_ptr) {
@@ -2521,14 +2524,16 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2521 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm); 2524 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
2522 u32 msg_sz = msg_size(imsg); 2525 u32 msg_sz = msg_size(imsg);
2523 u32 fragm_sz = msg_data_sz(fragm); 2526 u32 fragm_sz = msg_data_sz(fragm);
2524 u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz); 2527 u32 exp_fragm_cnt;
2525 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE; 2528 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
2529
2526 if (msg_type(imsg) == TIPC_MCAST_MSG) 2530 if (msg_type(imsg) == TIPC_MCAST_MSG)
2527 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; 2531 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
2528 if (msg_size(imsg) > max) { 2532 if (fragm_sz == 0 || msg_size(imsg) > max) {
2529 kfree_skb(fbuf); 2533 kfree_skb(fbuf);
2530 return 0; 2534 return 0;
2531 } 2535 }
2536 exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
2532 pbuf = tipc_buf_acquire(msg_size(imsg)); 2537 pbuf = tipc_buf_acquire(msg_size(imsg));
2533 if (pbuf != NULL) { 2538 if (pbuf != NULL) {
2534 pbuf->next = *pending; 2539 pbuf->next = *pending;
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 1526c211db66..dc0e59e53dbf 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -430,24 +430,23 @@ static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
430 return CCMP_TK_LEN; 430 return CCMP_TK_LEN;
431} 431}
432 432
433static char *lib80211_ccmp_print_stats(char *p, void *priv) 433static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
434{ 434{
435 struct lib80211_ccmp_data *ccmp = priv; 435 struct lib80211_ccmp_data *ccmp = priv;
436 436
437 p += sprintf(p, "key[%d] alg=CCMP key_set=%d " 437 seq_printf(m,
438 "tx_pn=%02x%02x%02x%02x%02x%02x " 438 "key[%d] alg=CCMP key_set=%d "
439 "rx_pn=%02x%02x%02x%02x%02x%02x " 439 "tx_pn=%02x%02x%02x%02x%02x%02x "
440 "format_errors=%d replays=%d decrypt_errors=%d\n", 440 "rx_pn=%02x%02x%02x%02x%02x%02x "
441 ccmp->key_idx, ccmp->key_set, 441 "format_errors=%d replays=%d decrypt_errors=%d\n",
442 ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], 442 ccmp->key_idx, ccmp->key_set,
443 ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], 443 ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
444 ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], 444 ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
445 ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], 445 ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
446 ccmp->dot11RSNAStatsCCMPFormatErrors, 446 ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
447 ccmp->dot11RSNAStatsCCMPReplays, 447 ccmp->dot11RSNAStatsCCMPFormatErrors,
448 ccmp->dot11RSNAStatsCCMPDecryptErrors); 448 ccmp->dot11RSNAStatsCCMPReplays,
449 449 ccmp->dot11RSNAStatsCCMPDecryptErrors);
450 return p;
451} 450}
452 451
453static struct lib80211_crypto_ops lib80211_crypt_ccmp = { 452static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index d475cfc8568f..8c90ba79e56e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -703,30 +703,30 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
703 return TKIP_KEY_LEN; 703 return TKIP_KEY_LEN;
704} 704}
705 705
706static char *lib80211_tkip_print_stats(char *p, void *priv) 706static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
707{ 707{
708 struct lib80211_tkip_data *tkip = priv; 708 struct lib80211_tkip_data *tkip = priv;
709 p += sprintf(p, "key[%d] alg=TKIP key_set=%d " 709 seq_printf(m,
710 "tx_pn=%02x%02x%02x%02x%02x%02x " 710 "key[%d] alg=TKIP key_set=%d "
711 "rx_pn=%02x%02x%02x%02x%02x%02x " 711 "tx_pn=%02x%02x%02x%02x%02x%02x "
712 "replays=%d icv_errors=%d local_mic_failures=%d\n", 712 "rx_pn=%02x%02x%02x%02x%02x%02x "
713 tkip->key_idx, tkip->key_set, 713 "replays=%d icv_errors=%d local_mic_failures=%d\n",
714 (tkip->tx_iv32 >> 24) & 0xff, 714 tkip->key_idx, tkip->key_set,
715 (tkip->tx_iv32 >> 16) & 0xff, 715 (tkip->tx_iv32 >> 24) & 0xff,
716 (tkip->tx_iv32 >> 8) & 0xff, 716 (tkip->tx_iv32 >> 16) & 0xff,
717 tkip->tx_iv32 & 0xff, 717 (tkip->tx_iv32 >> 8) & 0xff,
718 (tkip->tx_iv16 >> 8) & 0xff, 718 tkip->tx_iv32 & 0xff,
719 tkip->tx_iv16 & 0xff, 719 (tkip->tx_iv16 >> 8) & 0xff,
720 (tkip->rx_iv32 >> 24) & 0xff, 720 tkip->tx_iv16 & 0xff,
721 (tkip->rx_iv32 >> 16) & 0xff, 721 (tkip->rx_iv32 >> 24) & 0xff,
722 (tkip->rx_iv32 >> 8) & 0xff, 722 (tkip->rx_iv32 >> 16) & 0xff,
723 tkip->rx_iv32 & 0xff, 723 (tkip->rx_iv32 >> 8) & 0xff,
724 (tkip->rx_iv16 >> 8) & 0xff, 724 tkip->rx_iv32 & 0xff,
725 tkip->rx_iv16 & 0xff, 725 (tkip->rx_iv16 >> 8) & 0xff,
726 tkip->dot11RSNAStatsTKIPReplays, 726 tkip->rx_iv16 & 0xff,
727 tkip->dot11RSNAStatsTKIPICVErrors, 727 tkip->dot11RSNAStatsTKIPReplays,
728 tkip->dot11RSNAStatsTKIPLocalMICFailures); 728 tkip->dot11RSNAStatsTKIPICVErrors,
729 return p; 729 tkip->dot11RSNAStatsTKIPLocalMICFailures);
730} 730}
731 731
732static struct lib80211_crypto_ops lib80211_crypt_tkip = { 732static struct lib80211_crypto_ops lib80211_crypt_tkip = {
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index c1304018fc1c..1c292e4ea7b6 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -253,11 +253,10 @@ static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
253 return wep->key_len; 253 return wep->key_len;
254} 254}
255 255
256static char *lib80211_wep_print_stats(char *p, void *priv) 256static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
257{ 257{
258 struct lib80211_wep_data *wep = priv; 258 struct lib80211_wep_data *wep = priv;
259 p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); 259 seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
260 return p;
261} 260}
262 261
263static struct lib80211_crypto_ops lib80211_crypt_wep = { 262static struct lib80211_crypto_ops lib80211_crypt_wep = {
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 2ffde4631ae2..0917f047f2cf 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -187,7 +187,6 @@ static int x25_seq_forward_open(struct inode *inode, struct file *file)
187} 187}
188 188
189static const struct file_operations x25_seq_socket_fops = { 189static const struct file_operations x25_seq_socket_fops = {
190 .owner = THIS_MODULE,
191 .open = x25_seq_socket_open, 190 .open = x25_seq_socket_open,
192 .read = seq_read, 191 .read = seq_read,
193 .llseek = seq_lseek, 192 .llseek = seq_lseek,
@@ -195,7 +194,6 @@ static const struct file_operations x25_seq_socket_fops = {
195}; 194};
196 195
197static const struct file_operations x25_seq_route_fops = { 196static const struct file_operations x25_seq_route_fops = {
198 .owner = THIS_MODULE,
199 .open = x25_seq_route_open, 197 .open = x25_seq_route_open,
200 .read = seq_read, 198 .read = seq_read,
201 .llseek = seq_lseek, 199 .llseek = seq_lseek,
@@ -203,55 +201,38 @@ static const struct file_operations x25_seq_route_fops = {
203}; 201};
204 202
205static const struct file_operations x25_seq_forward_fops = { 203static const struct file_operations x25_seq_forward_fops = {
206 .owner = THIS_MODULE,
207 .open = x25_seq_forward_open, 204 .open = x25_seq_forward_open,
208 .read = seq_read, 205 .read = seq_read,
209 .llseek = seq_lseek, 206 .llseek = seq_lseek,
210 .release = seq_release, 207 .release = seq_release,
211}; 208};
212 209
213static struct proc_dir_entry *x25_proc_dir;
214
215int __init x25_proc_init(void) 210int __init x25_proc_init(void)
216{ 211{
217 struct proc_dir_entry *p; 212 if (!proc_mkdir("x25", init_net.proc_net))
218 int rc = -ENOMEM; 213 return -ENOMEM;
219 214
220 x25_proc_dir = proc_mkdir("x25", init_net.proc_net); 215 if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
221 if (!x25_proc_dir) 216 &x25_seq_route_fops))
222 goto out; 217 goto out;
223 218
224 p = proc_create("route", S_IRUGO, x25_proc_dir, &x25_seq_route_fops); 219 if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
225 if (!p) 220 &x25_seq_socket_fops))
226 goto out_route; 221 goto out;
227
228 p = proc_create("socket", S_IRUGO, x25_proc_dir, &x25_seq_socket_fops);
229 if (!p)
230 goto out_socket;
231 222
232 p = proc_create("forward", S_IRUGO, x25_proc_dir, 223 if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
233 &x25_seq_forward_fops); 224 &x25_seq_forward_fops))
234 if (!p) 225 goto out;
235 goto out_forward; 226 return 0;
236 rc = 0;
237 227
238out: 228out:
239 return rc; 229 remove_proc_subtree("x25", init_net.proc_net);
240out_forward: 230 return -ENOMEM;
241 remove_proc_entry("socket", x25_proc_dir);
242out_socket:
243 remove_proc_entry("route", x25_proc_dir);
244out_route:
245 remove_proc_entry("x25", init_net.proc_net);
246 goto out;
247} 231}
248 232
249void __exit x25_proc_exit(void) 233void __exit x25_proc_exit(void)
250{ 234{
251 remove_proc_entry("forward", x25_proc_dir); 235 remove_proc_subtree("x25", init_net.proc_net);
252 remove_proc_entry("route", x25_proc_dir);
253 remove_proc_entry("socket", x25_proc_dir);
254 remove_proc_entry("x25", init_net.proc_net);
255} 236}
256 237
257#else /* CONFIG_PROC_FS */ 238#else /* CONFIG_PROC_FS */
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00a75dc..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
311 .sadb_alg_maxbits = 128 311 .sadb_alg_maxbits = 128
312 } 312 }
313}, 313},
314{
315 /* rfc4494 */
316 .name = "cmac(aes)",
317
318 .uinfo = {
319 .auth = {
320 .icv_truncbits = 96,
321 .icv_fullbits = 128,
322 }
323 },
324
325 .pfkey_supported = 0,
326},
314}; 327};
315 328
316static struct xfrm_algo_desc ealg_list[] = { 329static struct xfrm_algo_desc ealg_list[] = {