aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/9p/trans_virtio.c48
-rw-r--r--net/batman-adv/distributed-arp-table.c13
-rw-r--r--net/batman-adv/main.c18
-rw-r--r--net/batman-adv/network-coding.c8
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/auth.c117
-rw-r--r--net/ceph/auth_x.c24
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/messenger.c1019
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/osd_client.c1090
-rw-r--r--net/ceph/osdmap.c45
-rw-r--r--net/ceph/snapshot.c78
-rw-r--r--net/core/dev.c13
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/sock.c12
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/gre.c10
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/tcp_metrics.c15
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/tcp_ipv6.c12
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/packet/af_packet.c53
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/auth_gss/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c7
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c7
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c358
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.h48
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c840
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h264
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c363
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/netns.h6
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/bcast.c40
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/xfrm/xfrm_algo.c13
54 files changed, 3776 insertions, 870 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8af508536d36..3a8c8fd63c88 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -628,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
628 netdev_features_t features) 628 netdev_features_t features)
629{ 629{
630 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; 630 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
631 u32 old_features = features; 631 netdev_features_t old_features = features;
632 632
633 features &= real_dev->vlan_features; 633 features &= real_dev->vlan_features;
634 features |= NETIF_F_RXCSUM; 634 features |= NETIF_F_RXCSUM;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de2e950a0a7a..e1c26b101830 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
194 if (s > count) 194 if (s > count)
195 s = count; 195 s = count;
196 BUG_ON(index > limit); 196 BUG_ON(index > limit);
197 /* Make sure we don't terminate early. */
198 sg_unmark_end(&sg[index]);
197 sg_set_buf(&sg[index++], data, s); 199 sg_set_buf(&sg[index++], data, s);
198 count -= s; 200 count -= s;
199 data += s; 201 data += s;
200 } 202 }
201 203 if (index-start)
204 sg_mark_end(&sg[index - 1]);
202 return index-start; 205 return index-start;
203} 206}
204 207
@@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
236 s = rest_of_page(data); 239 s = rest_of_page(data);
237 if (s > count) 240 if (s > count)
238 s = count; 241 s = count;
242 /* Make sure we don't terminate early. */
243 sg_unmark_end(&sg[index]);
239 sg_set_page(&sg[index++], pdata[i++], s, data_off); 244 sg_set_page(&sg[index++], pdata[i++], s, data_off);
240 data_off = 0; 245 data_off = 0;
241 data += s; 246 data += s;
242 count -= s; 247 count -= s;
243 nr_pages--; 248 nr_pages--;
244 } 249 }
250
251 if (index-start)
252 sg_mark_end(&sg[index - 1]);
245 return index - start; 253 return index - start;
246} 254}
247 255
@@ -256,9 +264,10 @@ static int
256p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 264p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
257{ 265{
258 int err; 266 int err;
259 int in, out; 267 int in, out, out_sgs, in_sgs;
260 unsigned long flags; 268 unsigned long flags;
261 struct virtio_chan *chan = client->trans; 269 struct virtio_chan *chan = client->trans;
270 struct scatterlist *sgs[2];
262 271
263 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
264 273
@@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
266req_retry: 275req_retry:
267 spin_lock_irqsave(&chan->lock, flags); 276 spin_lock_irqsave(&chan->lock, flags);
268 277
278 out_sgs = in_sgs = 0;
269 /* Handle out VirtIO ring buffers */ 279 /* Handle out VirtIO ring buffers */
270 out = pack_sg_list(chan->sg, 0, 280 out = pack_sg_list(chan->sg, 0,
271 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 281 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
282 if (out)
283 sgs[out_sgs++] = chan->sg;
272 284
273 in = pack_sg_list(chan->sg, out, 285 in = pack_sg_list(chan->sg, out,
274 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 286 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
287 if (in)
288 sgs[out_sgs + in_sgs++] = chan->sg + out;
275 289
276 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 290 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
277 GFP_ATOMIC); 291 GFP_ATOMIC);
278 if (err < 0) { 292 if (err < 0) {
279 if (err == -ENOSPC) { 293 if (err == -ENOSPC) {
@@ -289,7 +303,7 @@ req_retry:
289 } else { 303 } else {
290 spin_unlock_irqrestore(&chan->lock, flags); 304 spin_unlock_irqrestore(&chan->lock, flags);
291 p9_debug(P9_DEBUG_TRANS, 305 p9_debug(P9_DEBUG_TRANS,
292 "virtio rpc add_buf returned failure\n"); 306 "virtio rpc add_sgs returned failure\n");
293 return -EIO; 307 return -EIO;
294 } 308 }
295 } 309 }
@@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
351 char *uidata, char *uodata, int inlen, 365 char *uidata, char *uodata, int inlen,
352 int outlen, int in_hdr_len, int kern_buf) 366 int outlen, int in_hdr_len, int kern_buf)
353{ 367{
354 int in, out, err; 368 int in, out, err, out_sgs, in_sgs;
355 unsigned long flags; 369 unsigned long flags;
356 int in_nr_pages = 0, out_nr_pages = 0; 370 int in_nr_pages = 0, out_nr_pages = 0;
357 struct page **in_pages = NULL, **out_pages = NULL; 371 struct page **in_pages = NULL, **out_pages = NULL;
358 struct virtio_chan *chan = client->trans; 372 struct virtio_chan *chan = client->trans;
373 struct scatterlist *sgs[4];
359 374
360 p9_debug(P9_DEBUG_TRANS, "virtio request\n"); 375 p9_debug(P9_DEBUG_TRANS, "virtio request\n");
361 376
@@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
396 req->status = REQ_STATUS_SENT; 411 req->status = REQ_STATUS_SENT;
397req_retry_pinned: 412req_retry_pinned:
398 spin_lock_irqsave(&chan->lock, flags); 413 spin_lock_irqsave(&chan->lock, flags);
414
415 out_sgs = in_sgs = 0;
416
399 /* out data */ 417 /* out data */
400 out = pack_sg_list(chan->sg, 0, 418 out = pack_sg_list(chan->sg, 0,
401 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 419 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
402 420
403 if (out_pages) 421 if (out)
422 sgs[out_sgs++] = chan->sg;
423
424 if (out_pages) {
425 sgs[out_sgs++] = chan->sg + out;
404 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 426 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
405 out_pages, out_nr_pages, uodata, outlen); 427 out_pages, out_nr_pages, uodata, outlen);
428 }
429
406 /* 430 /*
407 * Take care of in data 431 * Take care of in data
408 * For example TREAD have 11. 432 * For example TREAD have 11.
@@ -412,11 +436,17 @@ req_retry_pinned:
412 */ 436 */
413 in = pack_sg_list(chan->sg, out, 437 in = pack_sg_list(chan->sg, out,
414 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 438 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
415 if (in_pages) 439 if (in)
440 sgs[out_sgs + in_sgs++] = chan->sg + out;
441
442 if (in_pages) {
443 sgs[out_sgs + in_sgs++] = chan->sg + out + in;
416 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 444 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
417 in_pages, in_nr_pages, uidata, inlen); 445 in_pages, in_nr_pages, uidata, inlen);
446 }
418 447
419 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 448 BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
449 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
420 GFP_ATOMIC); 450 GFP_ATOMIC);
421 if (err < 0) { 451 if (err < 0) {
422 if (err == -ENOSPC) { 452 if (err == -ENOSPC) {
@@ -432,7 +462,7 @@ req_retry_pinned:
432 } else { 462 } else {
433 spin_unlock_irqrestore(&chan->lock, flags); 463 spin_unlock_irqrestore(&chan->lock, flags);
434 p9_debug(P9_DEBUG_TRANS, 464 p9_debug(P9_DEBUG_TRANS,
435 "virtio rpc add_buf returned failure\n"); 465 "virtio rpc add_sgs returned failure\n");
436 err = -EIO; 466 err = -EIO;
437 goto err_out; 467 goto err_out;
438 } 468 }
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8e15d966d9b0..239992021b1d 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -837,6 +837,19 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
837 837
838 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); 838 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
839 if (dat_entry) { 839 if (dat_entry) {
840 /* If the ARP request is destined for a local client the local
841 * client will answer itself. DAT would only generate a
842 * duplicate packet.
843 *
844 * Moreover, if the soft-interface is enslaved into a bridge, an
845 * additional DAT answer may trigger kernel warnings about
846 * a packet coming from the wrong port.
847 */
848 if (batadv_is_my_client(bat_priv, dat_entry->mac_addr)) {
849 ret = true;
850 goto out;
851 }
852
840 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 853 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
841 bat_priv->soft_iface, ip_dst, hw_src, 854 bat_priv->soft_iface, ip_dst, hw_src,
842 dat_entry->mac_addr, hw_src); 855 dat_entry->mac_addr, hw_src);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3e30a0f1b908..1240f07ad31d 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -163,14 +163,22 @@ void batadv_mesh_free(struct net_device *soft_iface)
163 batadv_vis_quit(bat_priv); 163 batadv_vis_quit(bat_priv);
164 164
165 batadv_gw_node_purge(bat_priv); 165 batadv_gw_node_purge(bat_priv);
166 batadv_originator_free(bat_priv);
167 batadv_nc_free(bat_priv); 166 batadv_nc_free(bat_priv);
167 batadv_dat_free(bat_priv);
168 batadv_bla_free(bat_priv);
168 169
170 /* Free the TT and the originator tables only after having terminated
171 * all the other depending components which may use these structures for
172 * their purposes.
173 */
169 batadv_tt_free(bat_priv); 174 batadv_tt_free(bat_priv);
170 175
171 batadv_bla_free(bat_priv); 176 /* Since the originator table clean up routine is accessing the TT
172 177 * tables as well, it has to be invoked after the TT tables have been
173 batadv_dat_free(bat_priv); 178 * freed and marked as empty. This ensures that no cleanup RCU callbacks
179 * accessing the TT data are scheduled for later execution.
180 */
181 batadv_originator_free(bat_priv);
174 182
175 free_percpu(bat_priv->bat_counters); 183 free_percpu(bat_priv->bat_counters);
176 184
@@ -475,7 +483,7 @@ static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
475 char *algo_name = (char *)val; 483 char *algo_name = (char *)val;
476 size_t name_len = strlen(algo_name); 484 size_t name_len = strlen(algo_name);
477 485
478 if (algo_name[name_len - 1] == '\n') 486 if (name_len > 0 && algo_name[name_len - 1] == '\n')
479 algo_name[name_len - 1] = '\0'; 487 algo_name[name_len - 1] = '\0';
480 488
481 bat_algo_ops = batadv_algo_get(algo_name); 489 bat_algo_ops = batadv_algo_get(algo_name);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f7c54305a918..e84629ece9b7 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1514,6 +1514,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1514 struct ethhdr *ethhdr, ethhdr_tmp; 1514 struct ethhdr *ethhdr, ethhdr_tmp;
1515 uint8_t *orig_dest, ttl, ttvn; 1515 uint8_t *orig_dest, ttl, ttvn;
1516 unsigned int coding_len; 1516 unsigned int coding_len;
1517 int err;
1517 1518
1518 /* Save headers temporarily */ 1519 /* Save headers temporarily */
1519 memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); 1520 memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
@@ -1568,8 +1569,11 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1568 coding_len); 1569 coding_len);
1569 1570
1570 /* Resize decoded skb if decoded with larger packet */ 1571 /* Resize decoded skb if decoded with larger packet */
1571 if (nc_packet->skb->len > coding_len + h_size) 1572 if (nc_packet->skb->len > coding_len + h_size) {
1572 pskb_trim_rcsum(skb, coding_len + h_size); 1573 err = pskb_trim_rcsum(skb, coding_len + h_size);
1574 if (err)
1575 return NULL;
1576 }
1573 1577
1574 /* Create decoded unicast packet */ 1578 /* Create decoded unicast packet */
1575 unicast_packet = (struct batadv_unicast_packet *)skb->data; 1579 unicast_packet = (struct batadv_unicast_packet *)skb->data;
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c3530a81a33b..950663d4d330 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg)
107 107
108 br_debug(br, "tcn timer expired\n"); 108 br_debug(br, "tcn timer expired\n");
109 spin_lock(&br->lock); 109 spin_lock(&br->lock);
110 if (br->dev->flags & IFF_UP) { 110 if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) {
111 br_transmit_tcn(br); 111 br_transmit_tcn(br);
112 112
113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); 113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef435e11b..958d9856912c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
11 crypto.o armor.o \ 11 crypto.o armor.o \
12 auth_x.o \ 12 auth_x.o \
13 ceph_fs.o ceph_strings.o ceph_hash.o \ 13 ceph_fs.o ceph_strings.o ceph_hash.o \
14 pagevec.o 14 pagevec.o snapshot.o
15 15
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac090f1..6b923bcaa2a4 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
47 if (!ac) 47 if (!ac)
48 goto out; 48 goto out;
49 49
50 mutex_init(&ac->mutex);
50 ac->negotiating = true; 51 ac->negotiating = true;
51 if (name) 52 if (name)
52 ac->name = name; 53 ac->name = name;
@@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac)
73 */ 74 */
74void ceph_auth_reset(struct ceph_auth_client *ac) 75void ceph_auth_reset(struct ceph_auth_client *ac)
75{ 76{
77 mutex_lock(&ac->mutex);
76 dout("auth_reset %p\n", ac); 78 dout("auth_reset %p\n", ac);
77 if (ac->ops && !ac->negotiating) 79 if (ac->ops && !ac->negotiating)
78 ac->ops->reset(ac); 80 ac->ops->reset(ac);
79 ac->negotiating = true; 81 ac->negotiating = true;
82 mutex_unlock(&ac->mutex);
80} 83}
81 84
82int ceph_entity_name_encode(const char *name, void **p, void *end) 85int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
102 int i, num; 105 int i, num;
103 int ret; 106 int ret;
104 107
108 mutex_lock(&ac->mutex);
105 dout("auth_build_hello\n"); 109 dout("auth_build_hello\n");
106 monhdr->have_version = 0; 110 monhdr->have_version = 0;
107 monhdr->session_mon = cpu_to_le16(-1); 111 monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
122 126
123 ret = ceph_entity_name_encode(ac->name, &p, end); 127 ret = ceph_entity_name_encode(ac->name, &p, end);
124 if (ret < 0) 128 if (ret < 0)
125 return ret; 129 goto out;
126 ceph_decode_need(&p, end, sizeof(u64), bad); 130 ceph_decode_need(&p, end, sizeof(u64), bad);
127 ceph_encode_64(&p, ac->global_id); 131 ceph_encode_64(&p, ac->global_id);
128 132
129 ceph_encode_32(&lenp, p - lenp - sizeof(u32)); 133 ceph_encode_32(&lenp, p - lenp - sizeof(u32));
130 return p - buf; 134 ret = p - buf;
135out:
136 mutex_unlock(&ac->mutex);
137 return ret;
131 138
132bad: 139bad:
133 return -ERANGE; 140 ret = -ERANGE;
141 goto out;
134} 142}
135 143
136static int ceph_build_auth_request(struct ceph_auth_client *ac, 144static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac,
151 if (ret < 0) { 159 if (ret < 0) {
152 pr_err("error %d building auth method %s request\n", ret, 160 pr_err("error %d building auth method %s request\n", ret,
153 ac->ops->name); 161 ac->ops->name);
154 return ret; 162 goto out;
155 } 163 }
156 dout(" built request %d bytes\n", ret); 164 dout(" built request %d bytes\n", ret);
157 ceph_encode_32(&p, ret); 165 ceph_encode_32(&p, ret);
158 return p + ret - msg_buf; 166 ret = p + ret - msg_buf;
167out:
168 return ret;
159} 169}
160 170
161/* 171/*
@@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
176 int result_msg_len; 186 int result_msg_len;
177 int ret = -EINVAL; 187 int ret = -EINVAL;
178 188
189 mutex_lock(&ac->mutex);
179 dout("handle_auth_reply %p %p\n", p, end); 190 dout("handle_auth_reply %p %p\n", p, end);
180 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); 191 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
181 protocol = ceph_decode_32(&p); 192 protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
227 238
228 ret = ac->ops->handle_reply(ac, result, payload, payload_end); 239 ret = ac->ops->handle_reply(ac, result, payload, payload_end);
229 if (ret == -EAGAIN) { 240 if (ret == -EAGAIN) {
230 return ceph_build_auth_request(ac, reply_buf, reply_len); 241 ret = ceph_build_auth_request(ac, reply_buf, reply_len);
231 } else if (ret) { 242 } else if (ret) {
232 pr_err("auth method '%s' error %d\n", ac->ops->name, ret); 243 pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
233 return ret;
234 } 244 }
235 return 0;
236 245
237bad:
238 pr_err("failed to decode auth msg\n");
239out: 246out:
247 mutex_unlock(&ac->mutex);
240 return ret; 248 return ret;
249
250bad:
251 pr_err("failed to decode auth msg\n");
252 ret = -EINVAL;
253 goto out;
241} 254}
242 255
243int ceph_build_auth(struct ceph_auth_client *ac, 256int ceph_build_auth(struct ceph_auth_client *ac,
244 void *msg_buf, size_t msg_len) 257 void *msg_buf, size_t msg_len)
245{ 258{
259 int ret = 0;
260
261 mutex_lock(&ac->mutex);
246 if (!ac->protocol) 262 if (!ac->protocol)
247 return ceph_auth_build_hello(ac, msg_buf, msg_len); 263 ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
248 BUG_ON(!ac->ops); 264 else if (ac->ops->should_authenticate(ac))
249 if (ac->ops->should_authenticate(ac)) 265 ret = ceph_build_auth_request(ac, msg_buf, msg_len);
250 return ceph_build_auth_request(ac, msg_buf, msg_len); 266 mutex_unlock(&ac->mutex);
251 return 0; 267 return ret;
252} 268}
253 269
254int ceph_auth_is_authenticated(struct ceph_auth_client *ac) 270int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
255{ 271{
256 if (!ac->ops) 272 int ret = 0;
257 return 0; 273
258 return ac->ops->is_authenticated(ac); 274 mutex_lock(&ac->mutex);
275 if (ac->ops)
276 ret = ac->ops->is_authenticated(ac);
277 mutex_unlock(&ac->mutex);
278 return ret;
279}
280EXPORT_SYMBOL(ceph_auth_is_authenticated);
281
282int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
283 int peer_type,
284 struct ceph_auth_handshake *auth)
285{
286 int ret = 0;
287
288 mutex_lock(&ac->mutex);
289 if (ac->ops && ac->ops->create_authorizer)
290 ret = ac->ops->create_authorizer(ac, peer_type, auth);
291 mutex_unlock(&ac->mutex);
292 return ret;
293}
294EXPORT_SYMBOL(ceph_auth_create_authorizer);
295
296void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
297 struct ceph_authorizer *a)
298{
299 mutex_lock(&ac->mutex);
300 if (ac->ops && ac->ops->destroy_authorizer)
301 ac->ops->destroy_authorizer(ac, a);
302 mutex_unlock(&ac->mutex);
303}
304EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
305
306int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
307 int peer_type,
308 struct ceph_auth_handshake *a)
309{
310 int ret = 0;
311
312 mutex_lock(&ac->mutex);
313 if (ac->ops && ac->ops->update_authorizer)
314 ret = ac->ops->update_authorizer(ac, peer_type, a);
315 mutex_unlock(&ac->mutex);
316 return ret;
317}
318EXPORT_SYMBOL(ceph_auth_update_authorizer);
319
320int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
321 struct ceph_authorizer *a, size_t len)
322{
323 int ret = 0;
324
325 mutex_lock(&ac->mutex);
326 if (ac->ops && ac->ops->verify_authorizer_reply)
327 ret = ac->ops->verify_authorizer_reply(ac, a, len);
328 mutex_unlock(&ac->mutex);
329 return ret;
330}
331EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
332
333void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
334{
335 mutex_lock(&ac->mutex);
336 if (ac->ops && ac->ops->invalidate_authorizer)
337 ac->ops->invalidate_authorizer(ac, peer_type);
338 mutex_unlock(&ac->mutex);
259} 339}
340EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14eb027..96238ba95f2b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
298 return -ENOMEM; 298 return -ENOMEM;
299 } 299 }
300 au->service = th->service; 300 au->service = th->service;
301 au->secret_id = th->secret_id;
301 302
302 msg_a = au->buf->vec.iov_base; 303 msg_a = au->buf->vec.iov_base;
303 msg_a->struct_v = 1; 304 msg_a->struct_v = 1;
@@ -555,6 +556,26 @@ static int ceph_x_create_authorizer(
555 return 0; 556 return 0;
556} 557}
557 558
559static int ceph_x_update_authorizer(
560 struct ceph_auth_client *ac, int peer_type,
561 struct ceph_auth_handshake *auth)
562{
563 struct ceph_x_authorizer *au;
564 struct ceph_x_ticket_handler *th;
565
566 th = get_ticket_handler(ac, peer_type);
567 if (IS_ERR(th))
568 return PTR_ERR(th);
569
570 au = (struct ceph_x_authorizer *)auth->authorizer;
571 if (au->secret_id < th->secret_id) {
572 dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
573 au->service, au->secret_id, th->secret_id);
574 return ceph_x_build_authorizer(ac, th, au);
575 }
576 return 0;
577}
578
558static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, 579static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
559 struct ceph_authorizer *a, size_t len) 580 struct ceph_authorizer *a, size_t len)
560{ 581{
@@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
630 651
631 th = get_ticket_handler(ac, peer_type); 652 th = get_ticket_handler(ac, peer_type);
632 if (!IS_ERR(th)) 653 if (!IS_ERR(th))
633 remove_ticket_handler(ac, th); 654 memset(&th->validity, 0, sizeof(th->validity));
634} 655}
635 656
636 657
@@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
641 .build_request = ceph_x_build_request, 662 .build_request = ceph_x_build_request,
642 .handle_reply = ceph_x_handle_reply, 663 .handle_reply = ceph_x_handle_reply,
643 .create_authorizer = ceph_x_create_authorizer, 664 .create_authorizer = ceph_x_create_authorizer,
665 .update_authorizer = ceph_x_update_authorizer,
644 .verify_authorizer_reply = ceph_x_verify_authorizer_reply, 666 .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
645 .destroy_authorizer = ceph_x_destroy_authorizer, 667 .destroy_authorizer = ceph_x_destroy_authorizer,
646 .invalidate_authorizer = ceph_x_invalidate_authorizer, 668 .invalidate_authorizer = ceph_x_invalidate_authorizer,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93b774f..c5a058da7ac8 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -29,6 +29,7 @@ struct ceph_x_authorizer {
29 struct ceph_buffer *buf; 29 struct ceph_buffer *buf;
30 unsigned int service; 30 unsigned int service;
31 u64 nonce; 31 u64 nonce;
32 u64 secret_id;
32 char reply_buf[128]; /* big enough for encrypted blob */ 33 char reply_buf[128]; /* big enough for encrypted blob */
33}; 34};
34 35
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index e65e6e4be38b..34b11ee8124e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -606,11 +606,17 @@ static int __init init_ceph_lib(void)
606 if (ret < 0) 606 if (ret < 0)
607 goto out_crypto; 607 goto out_crypto;
608 608
609 ret = ceph_osdc_setup();
610 if (ret < 0)
611 goto out_msgr;
612
609 pr_info("loaded (mon/osd proto %d/%d)\n", 613 pr_info("loaded (mon/osd proto %d/%d)\n",
610 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); 614 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
611 615
612 return 0; 616 return 0;
613 617
618out_msgr:
619 ceph_msgr_exit();
614out_crypto: 620out_crypto:
615 ceph_crypto_shutdown(); 621 ceph_crypto_shutdown();
616out_debugfs: 622out_debugfs:
@@ -622,6 +628,7 @@ out:
622static void __exit exit_ceph_lib(void) 628static void __exit exit_ceph_lib(void)
623{ 629{
624 dout("exit_ceph_lib\n"); 630 dout("exit_ceph_lib\n");
631 ceph_osdc_cleanup();
625 ceph_msgr_exit(); 632 ceph_msgr_exit();
626 ceph_crypto_shutdown(); 633 ceph_crypto_shutdown();
627 ceph_debugfs_cleanup(); 634 ceph_debugfs_cleanup();
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f4894e..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
123 mutex_lock(&osdc->request_mutex); 123 mutex_lock(&osdc->request_mutex);
124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
125 struct ceph_osd_request *req; 125 struct ceph_osd_request *req;
126 unsigned int i;
126 int opcode; 127 int opcode;
127 int i;
128 128
129 req = rb_entry(p, struct ceph_osd_request, r_node); 129 req = rb_entry(p, struct ceph_osd_request, r_node);
130 130
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
142 seq_printf(s, "\t"); 142 seq_printf(s, "\t");
143 143
144 for (i = 0; i < req->r_num_ops; i++) { 144 for (i = 0; i < req->r_num_ops; i++) {
145 opcode = le16_to_cpu(req->r_request_ops[i].op); 145 opcode = req->r_ops[i].op;
146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); 146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
147 } 147 }
148 148
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2c0669fb54e3..eb0a46a49bd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -21,6 +21,9 @@
21#include <linux/ceph/pagelist.h> 21#include <linux/ceph/pagelist.h>
22#include <linux/export.h> 22#include <linux/export.h>
23 23
24#define list_entry_next(pos, member) \
25 list_entry(pos->member.next, typeof(*pos), member)
26
24/* 27/*
25 * Ceph uses the messenger to exchange ceph_msg messages with other 28 * Ceph uses the messenger to exchange ceph_msg messages with other
26 * hosts in the system. The messenger provides ordered and reliable 29 * hosts in the system. The messenger provides ordered and reliable
@@ -149,6 +152,11 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
149 return test_and_set_bit(con_flag, &con->flags); 152 return test_and_set_bit(con_flag, &con->flags);
150} 153}
151 154
155/* Slab caches for frequently-allocated structures */
156
157static struct kmem_cache *ceph_msg_cache;
158static struct kmem_cache *ceph_msg_data_cache;
159
152/* static tag bytes (protocol control messages) */ 160/* static tag bytes (protocol control messages) */
153static char tag_msg = CEPH_MSGR_TAG_MSG; 161static char tag_msg = CEPH_MSGR_TAG_MSG;
154static char tag_ack = CEPH_MSGR_TAG_ACK; 162static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -223,6 +231,41 @@ static void encode_my_addr(struct ceph_messenger *msgr)
223 */ 231 */
224static struct workqueue_struct *ceph_msgr_wq; 232static struct workqueue_struct *ceph_msgr_wq;
225 233
234static int ceph_msgr_slab_init(void)
235{
236 BUG_ON(ceph_msg_cache);
237 ceph_msg_cache = kmem_cache_create("ceph_msg",
238 sizeof (struct ceph_msg),
239 __alignof__(struct ceph_msg), 0, NULL);
240
241 if (!ceph_msg_cache)
242 return -ENOMEM;
243
244 BUG_ON(ceph_msg_data_cache);
245 ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
246 sizeof (struct ceph_msg_data),
247 __alignof__(struct ceph_msg_data),
248 0, NULL);
249 if (ceph_msg_data_cache)
250 return 0;
251
252 kmem_cache_destroy(ceph_msg_cache);
253 ceph_msg_cache = NULL;
254
255 return -ENOMEM;
256}
257
258static void ceph_msgr_slab_exit(void)
259{
260 BUG_ON(!ceph_msg_data_cache);
261 kmem_cache_destroy(ceph_msg_data_cache);
262 ceph_msg_data_cache = NULL;
263
264 BUG_ON(!ceph_msg_cache);
265 kmem_cache_destroy(ceph_msg_cache);
266 ceph_msg_cache = NULL;
267}
268
226static void _ceph_msgr_exit(void) 269static void _ceph_msgr_exit(void)
227{ 270{
228 if (ceph_msgr_wq) { 271 if (ceph_msgr_wq) {
@@ -230,6 +273,8 @@ static void _ceph_msgr_exit(void)
230 ceph_msgr_wq = NULL; 273 ceph_msgr_wq = NULL;
231 } 274 }
232 275
276 ceph_msgr_slab_exit();
277
233 BUG_ON(zero_page == NULL); 278 BUG_ON(zero_page == NULL);
234 kunmap(zero_page); 279 kunmap(zero_page);
235 page_cache_release(zero_page); 280 page_cache_release(zero_page);
@@ -242,6 +287,9 @@ int ceph_msgr_init(void)
242 zero_page = ZERO_PAGE(0); 287 zero_page = ZERO_PAGE(0);
243 page_cache_get(zero_page); 288 page_cache_get(zero_page);
244 289
290 if (ceph_msgr_slab_init())
291 return -ENOMEM;
292
245 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); 293 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
246 if (ceph_msgr_wq) 294 if (ceph_msgr_wq)
247 return 0; 295 return 0;
@@ -471,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
471 return r; 519 return r;
472} 520}
473 521
522static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
523 int page_offset, size_t length)
524{
525 void *kaddr;
526 int ret;
527
528 BUG_ON(page_offset + length > PAGE_SIZE);
529
530 kaddr = kmap(page);
531 BUG_ON(!kaddr);
532 ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
533 kunmap(page);
534
535 return ret;
536}
537
474/* 538/*
475 * write something. @more is true if caller will be sending more data 539 * write something. @more is true if caller will be sending more data
476 * shortly. 540 * shortly.
@@ -493,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
493} 557}
494 558
495static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 559static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
496 int offset, size_t size, int more) 560 int offset, size_t size, bool more)
497{ 561{
498 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); 562 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
499 int ret; 563 int ret;
@@ -697,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con,
697} 761}
698 762
699#ifdef CONFIG_BLOCK 763#ifdef CONFIG_BLOCK
700static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) 764
765/*
766 * For a bio data item, a piece is whatever remains of the next
767 * entry in the current bio iovec, or the first entry in the next
768 * bio in the list.
769 */
770static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
771 size_t length)
701{ 772{
702 if (!bio) { 773 struct ceph_msg_data *data = cursor->data;
703 *iter = NULL; 774 struct bio *bio;
704 *seg = 0; 775
705 return; 776 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
777
778 bio = data->bio;
779 BUG_ON(!bio);
780 BUG_ON(!bio->bi_vcnt);
781
782 cursor->resid = min(length, data->bio_length);
783 cursor->bio = bio;
784 cursor->vector_index = 0;
785 cursor->vector_offset = 0;
786 cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
787}
788
789static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
790 size_t *page_offset,
791 size_t *length)
792{
793 struct ceph_msg_data *data = cursor->data;
794 struct bio *bio;
795 struct bio_vec *bio_vec;
796 unsigned int index;
797
798 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
799
800 bio = cursor->bio;
801 BUG_ON(!bio);
802
803 index = cursor->vector_index;
804 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
805
806 bio_vec = &bio->bi_io_vec[index];
807 BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
808 *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
809 BUG_ON(*page_offset >= PAGE_SIZE);
810 if (cursor->last_piece) /* pagelist offset is always 0 */
811 *length = cursor->resid;
812 else
813 *length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
814 BUG_ON(*length > cursor->resid);
815 BUG_ON(*page_offset + *length > PAGE_SIZE);
816
817 return bio_vec->bv_page;
818}
819
820static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
821 size_t bytes)
822{
823 struct bio *bio;
824 struct bio_vec *bio_vec;
825 unsigned int index;
826
827 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
828
829 bio = cursor->bio;
830 BUG_ON(!bio);
831
832 index = cursor->vector_index;
833 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
834 bio_vec = &bio->bi_io_vec[index];
835
836 /* Advance the cursor offset */
837
838 BUG_ON(cursor->resid < bytes);
839 cursor->resid -= bytes;
840 cursor->vector_offset += bytes;
841 if (cursor->vector_offset < bio_vec->bv_len)
842 return false; /* more bytes to process in this segment */
843 BUG_ON(cursor->vector_offset != bio_vec->bv_len);
844
845 /* Move on to the next segment, and possibly the next bio */
846
847 if (++index == (unsigned int) bio->bi_vcnt) {
848 bio = bio->bi_next;
849 index = 0;
706 } 850 }
707 *iter = bio; 851 cursor->bio = bio;
708 *seg = bio->bi_idx; 852 cursor->vector_index = index;
853 cursor->vector_offset = 0;
854
855 if (!cursor->last_piece) {
856 BUG_ON(!cursor->resid);
857 BUG_ON(!bio);
858 /* A short read is OK, so use <= rather than == */
859 if (cursor->resid <= bio->bi_io_vec[index].bv_len)
860 cursor->last_piece = true;
861 }
862
863 return true;
709} 864}
865#endif /* CONFIG_BLOCK */
710 866
711static void iter_bio_next(struct bio **bio_iter, int *seg) 867/*
868 * For a page array, a piece comes from the first page in the array
869 * that has not already been fully consumed.
870 */
871static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
872 size_t length)
712{ 873{
713 if (*bio_iter == NULL) 874 struct ceph_msg_data *data = cursor->data;
714 return; 875 int page_count;
876
877 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
715 878
716 BUG_ON(*seg >= (*bio_iter)->bi_vcnt); 879 BUG_ON(!data->pages);
880 BUG_ON(!data->length);
717 881
718 (*seg)++; 882 cursor->resid = min(length, data->length);
719 if (*seg == (*bio_iter)->bi_vcnt) 883 page_count = calc_pages_for(data->alignment, (u64)data->length);
720 init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); 884 cursor->page_offset = data->alignment & ~PAGE_MASK;
885 cursor->page_index = 0;
886 BUG_ON(page_count > (int)USHRT_MAX);
887 cursor->page_count = (unsigned short)page_count;
888 BUG_ON(length > SIZE_MAX - cursor->page_offset);
889 cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
721} 890}
722#endif
723 891
724static void prepare_write_message_data(struct ceph_connection *con) 892static struct page *
893ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
894 size_t *page_offset, size_t *length)
725{ 895{
726 struct ceph_msg *msg = con->out_msg; 896 struct ceph_msg_data *data = cursor->data;
727 897
728 BUG_ON(!msg); 898 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
729 BUG_ON(!msg->hdr.data_len); 899
900 BUG_ON(cursor->page_index >= cursor->page_count);
901 BUG_ON(cursor->page_offset >= PAGE_SIZE);
902
903 *page_offset = cursor->page_offset;
904 if (cursor->last_piece)
905 *length = cursor->resid;
906 else
907 *length = PAGE_SIZE - *page_offset;
908
909 return data->pages[cursor->page_index];
910}
911
912static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
913 size_t bytes)
914{
915 BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
916
917 BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
918
919 /* Advance the cursor page offset */
920
921 cursor->resid -= bytes;
922 cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
923 if (!bytes || cursor->page_offset)
924 return false; /* more bytes to process in the current page */
925
926 /* Move on to the next page; offset is already at 0 */
927
928 BUG_ON(cursor->page_index >= cursor->page_count);
929 cursor->page_index++;
930 cursor->last_piece = cursor->resid <= PAGE_SIZE;
931
932 return true;
933}
934
935/*
936 * For a pagelist, a piece is whatever remains to be consumed in the
937 * first page in the list, or the front of the next page.
938 */
939static void
940ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
941 size_t length)
942{
943 struct ceph_msg_data *data = cursor->data;
944 struct ceph_pagelist *pagelist;
945 struct page *page;
946
947 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
948
949 pagelist = data->pagelist;
950 BUG_ON(!pagelist);
951
952 if (!length)
953 return; /* pagelist can be assigned but empty */
954
955 BUG_ON(list_empty(&pagelist->head));
956 page = list_first_entry(&pagelist->head, struct page, lru);
957
958 cursor->resid = min(length, pagelist->length);
959 cursor->page = page;
960 cursor->offset = 0;
961 cursor->last_piece = cursor->resid <= PAGE_SIZE;
962}
963
964static struct page *
965ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
966 size_t *page_offset, size_t *length)
967{
968 struct ceph_msg_data *data = cursor->data;
969 struct ceph_pagelist *pagelist;
970
971 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
730 972
731 /* initialize page iterator */ 973 pagelist = data->pagelist;
732 con->out_msg_pos.page = 0; 974 BUG_ON(!pagelist);
733 if (msg->pages) 975
734 con->out_msg_pos.page_pos = msg->page_alignment; 976 BUG_ON(!cursor->page);
977 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
978
979 /* offset of first page in pagelist is always 0 */
980 *page_offset = cursor->offset & ~PAGE_MASK;
981 if (cursor->last_piece)
982 *length = cursor->resid;
735 else 983 else
736 con->out_msg_pos.page_pos = 0; 984 *length = PAGE_SIZE - *page_offset;
985
986 return cursor->page;
987}
988
989static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
990 size_t bytes)
991{
992 struct ceph_msg_data *data = cursor->data;
993 struct ceph_pagelist *pagelist;
994
995 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
996
997 pagelist = data->pagelist;
998 BUG_ON(!pagelist);
999
1000 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
1001 BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
1002
1003 /* Advance the cursor offset */
1004
1005 cursor->resid -= bytes;
1006 cursor->offset += bytes;
1007 /* offset of first page in pagelist is always 0 */
1008 if (!bytes || cursor->offset & ~PAGE_MASK)
1009 return false; /* more bytes to process in the current page */
1010
1011 /* Move on to the next page */
1012
1013 BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
1014 cursor->page = list_entry_next(cursor->page, lru);
1015 cursor->last_piece = cursor->resid <= PAGE_SIZE;
1016
1017 return true;
1018}
1019
1020/*
1021 * Message data is handled (sent or received) in pieces, where each
1022 * piece resides on a single page. The network layer might not
1023 * consume an entire piece at once. A data item's cursor keeps
1024 * track of which piece is next to process and how much remains to
1025 * be processed in that piece. It also tracks whether the current
1026 * piece is the last one in the data item.
1027 */
1028static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
1029{
1030 size_t length = cursor->total_resid;
1031
1032 switch (cursor->data->type) {
1033 case CEPH_MSG_DATA_PAGELIST:
1034 ceph_msg_data_pagelist_cursor_init(cursor, length);
1035 break;
1036 case CEPH_MSG_DATA_PAGES:
1037 ceph_msg_data_pages_cursor_init(cursor, length);
1038 break;
737#ifdef CONFIG_BLOCK 1039#ifdef CONFIG_BLOCK
738 if (msg->bio) 1040 case CEPH_MSG_DATA_BIO:
739 init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); 1041 ceph_msg_data_bio_cursor_init(cursor, length);
740#endif 1042 break;
741 con->out_msg_pos.data_pos = 0; 1043#endif /* CONFIG_BLOCK */
742 con->out_msg_pos.did_page_crc = false; 1044 case CEPH_MSG_DATA_NONE:
743 con->out_more = 1; /* data + footer will follow */ 1045 default:
1046 /* BUG(); */
1047 break;
1048 }
1049 cursor->need_crc = true;
1050}
1051
1052static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
1053{
1054 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1055 struct ceph_msg_data *data;
1056
1057 BUG_ON(!length);
1058 BUG_ON(length > msg->data_length);
1059 BUG_ON(list_empty(&msg->data));
1060
1061 cursor->data_head = &msg->data;
1062 cursor->total_resid = length;
1063 data = list_first_entry(&msg->data, struct ceph_msg_data, links);
1064 cursor->data = data;
1065
1066 __ceph_msg_data_cursor_init(cursor);
1067}
1068
1069/*
1070 * Return the page containing the next piece to process for a given
1071 * data item, and supply the page offset and length of that piece.
1072 * Indicate whether this is the last piece in this data item.
1073 */
1074static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
1075 size_t *page_offset, size_t *length,
1076 bool *last_piece)
1077{
1078 struct page *page;
1079
1080 switch (cursor->data->type) {
1081 case CEPH_MSG_DATA_PAGELIST:
1082 page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
1083 break;
1084 case CEPH_MSG_DATA_PAGES:
1085 page = ceph_msg_data_pages_next(cursor, page_offset, length);
1086 break;
1087#ifdef CONFIG_BLOCK
1088 case CEPH_MSG_DATA_BIO:
1089 page = ceph_msg_data_bio_next(cursor, page_offset, length);
1090 break;
1091#endif /* CONFIG_BLOCK */
1092 case CEPH_MSG_DATA_NONE:
1093 default:
1094 page = NULL;
1095 break;
1096 }
1097 BUG_ON(!page);
1098 BUG_ON(*page_offset + *length > PAGE_SIZE);
1099 BUG_ON(!*length);
1100 if (last_piece)
1101 *last_piece = cursor->last_piece;
1102
1103 return page;
1104}
1105
1106/*
1107 * Returns true if the result moves the cursor on to the next piece
1108 * of the data item.
1109 */
1110static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
1111 size_t bytes)
1112{
1113 bool new_piece;
1114
1115 BUG_ON(bytes > cursor->resid);
1116 switch (cursor->data->type) {
1117 case CEPH_MSG_DATA_PAGELIST:
1118 new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
1119 break;
1120 case CEPH_MSG_DATA_PAGES:
1121 new_piece = ceph_msg_data_pages_advance(cursor, bytes);
1122 break;
1123#ifdef CONFIG_BLOCK
1124 case CEPH_MSG_DATA_BIO:
1125 new_piece = ceph_msg_data_bio_advance(cursor, bytes);
1126 break;
1127#endif /* CONFIG_BLOCK */
1128 case CEPH_MSG_DATA_NONE:
1129 default:
1130 BUG();
1131 break;
1132 }
1133 cursor->total_resid -= bytes;
1134
1135 if (!cursor->resid && cursor->total_resid) {
1136 WARN_ON(!cursor->last_piece);
1137 BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
1138 cursor->data = list_entry_next(cursor->data, links);
1139 __ceph_msg_data_cursor_init(cursor);
1140 new_piece = true;
1141 }
1142 cursor->need_crc = new_piece;
1143
1144 return new_piece;
1145}
1146
1147static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1148{
1149 BUG_ON(!msg);
1150 BUG_ON(!data_len);
1151
1152 /* Initialize data cursor */
1153
1154 ceph_msg_data_cursor_init(msg, (size_t)data_len);
744} 1155}
745 1156
746/* 1157/*
@@ -803,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con)
803 m->hdr.seq = cpu_to_le64(++con->out_seq); 1214 m->hdr.seq = cpu_to_le64(++con->out_seq);
804 m->needs_out_seq = false; 1215 m->needs_out_seq = false;
805 } 1216 }
806#ifdef CONFIG_BLOCK 1217 WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
807 else
808 m->bio_iter = NULL;
809#endif
810 1218
811 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 1219 dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
812 m, con->out_seq, le16_to_cpu(m->hdr.type), 1220 m, con->out_seq, le16_to_cpu(m->hdr.type),
813 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 1221 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
814 le32_to_cpu(m->hdr.data_len), 1222 m->data_length);
815 m->nr_pages);
816 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); 1223 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
817 1224
818 /* tag + hdr + front + middle */ 1225 /* tag + hdr + front + middle */
@@ -843,11 +1250,13 @@ static void prepare_write_message(struct ceph_connection *con)
843 1250
844 /* is there a data payload? */ 1251 /* is there a data payload? */
845 con->out_msg->footer.data_crc = 0; 1252 con->out_msg->footer.data_crc = 0;
846 if (m->hdr.data_len) 1253 if (m->data_length) {
847 prepare_write_message_data(con); 1254 prepare_message_data(con->out_msg, m->data_length);
848 else 1255 con->out_more = 1; /* data + footer will follow */
1256 } else {
849 /* no, queue up footer too and be done */ 1257 /* no, queue up footer too and be done */
850 prepare_write_message_footer(con); 1258 prepare_write_message_footer(con);
1259 }
851 1260
852 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1261 con_flag_set(con, CON_FLAG_WRITE_PENDING);
853} 1262}
@@ -874,6 +1283,24 @@ static void prepare_write_ack(struct ceph_connection *con)
874} 1283}
875 1284
876/* 1285/*
1286 * Prepare to share the seq during handshake
1287 */
1288static void prepare_write_seq(struct ceph_connection *con)
1289{
1290 dout("prepare_write_seq %p %llu -> %llu\n", con,
1291 con->in_seq_acked, con->in_seq);
1292 con->in_seq_acked = con->in_seq;
1293
1294 con_out_kvec_reset(con);
1295
1296 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
1297 con_out_kvec_add(con, sizeof (con->out_temp_ack),
1298 &con->out_temp_ack);
1299
1300 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1301}
1302
1303/*
877 * Prepare to write keepalive byte. 1304 * Prepare to write keepalive byte.
878 */ 1305 */
879static void prepare_write_keepalive(struct ceph_connection *con) 1306static void prepare_write_keepalive(struct ceph_connection *con)
@@ -1022,35 +1449,19 @@ out:
1022 return ret; /* done! */ 1449 return ret; /* done! */
1023} 1450}
1024 1451
1025static void out_msg_pos_next(struct ceph_connection *con, struct page *page, 1452static u32 ceph_crc32c_page(u32 crc, struct page *page,
1026 size_t len, size_t sent, bool in_trail) 1453 unsigned int page_offset,
1454 unsigned int length)
1027{ 1455{
1028 struct ceph_msg *msg = con->out_msg; 1456 char *kaddr;
1029 1457
1030 BUG_ON(!msg); 1458 kaddr = kmap(page);
1031 BUG_ON(!sent); 1459 BUG_ON(kaddr == NULL);
1032 1460 crc = crc32c(crc, kaddr + page_offset, length);
1033 con->out_msg_pos.data_pos += sent; 1461 kunmap(page);
1034 con->out_msg_pos.page_pos += sent;
1035 if (sent < len)
1036 return;
1037 1462
1038 BUG_ON(sent != len); 1463 return crc;
1039 con->out_msg_pos.page_pos = 0;
1040 con->out_msg_pos.page++;
1041 con->out_msg_pos.did_page_crc = false;
1042 if (in_trail)
1043 list_move_tail(&page->lru,
1044 &msg->trail->head);
1045 else if (msg->pagelist)
1046 list_move_tail(&page->lru,
1047 &msg->pagelist->head);
1048#ifdef CONFIG_BLOCK
1049 else if (msg->bio)
1050 iter_bio_next(&msg->bio_iter, &msg->bio_seg);
1051#endif
1052} 1464}
1053
1054/* 1465/*
1055 * Write as much message data payload as we can. If we finish, queue 1466 * Write as much message data payload as we can. If we finish, queue
1056 * up the footer. 1467 * up the footer.
@@ -1058,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
1058 * 0 -> socket full, but more to do 1469 * 0 -> socket full, but more to do
1059 * <0 -> error 1470 * <0 -> error
1060 */ 1471 */
1061static int write_partial_msg_pages(struct ceph_connection *con) 1472static int write_partial_message_data(struct ceph_connection *con)
1062{ 1473{
1063 struct ceph_msg *msg = con->out_msg; 1474 struct ceph_msg *msg = con->out_msg;
1064 unsigned int data_len = le32_to_cpu(msg->hdr.data_len); 1475 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1065 size_t len;
1066 bool do_datacrc = !con->msgr->nocrc; 1476 bool do_datacrc = !con->msgr->nocrc;
1067 int ret; 1477 u32 crc;
1068 int total_max_write;
1069 bool in_trail = false;
1070 const size_t trail_len = (msg->trail ? msg->trail->length : 0);
1071 const size_t trail_off = data_len - trail_len;
1072 1478
1073 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", 1479 dout("%s %p msg %p\n", __func__, con, msg);
1074 con, msg, con->out_msg_pos.page, msg->nr_pages, 1480
1075 con->out_msg_pos.page_pos); 1481 if (list_empty(&msg->data))
1482 return -EINVAL;
1076 1483
1077 /* 1484 /*
1078 * Iterate through each page that contains data to be 1485 * Iterate through each page that contains data to be
@@ -1082,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con)
1082 * need to map the page. If we have no pages, they have 1489 * need to map the page. If we have no pages, they have
1083 * been revoked, so use the zero page. 1490 * been revoked, so use the zero page.
1084 */ 1491 */
1085 while (data_len > con->out_msg_pos.data_pos) { 1492 crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
1086 struct page *page = NULL; 1493 while (cursor->resid) {
1087 int max_write = PAGE_SIZE; 1494 struct page *page;
1088 int bio_offset = 0; 1495 size_t page_offset;
1089 1496 size_t length;
1090 in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; 1497 bool last_piece;
1091 if (!in_trail) 1498 bool need_crc;
1092 total_max_write = trail_off - con->out_msg_pos.data_pos; 1499 int ret;
1093
1094 if (in_trail) {
1095 total_max_write = data_len - con->out_msg_pos.data_pos;
1096
1097 page = list_first_entry(&msg->trail->head,
1098 struct page, lru);
1099 } else if (msg->pages) {
1100 page = msg->pages[con->out_msg_pos.page];
1101 } else if (msg->pagelist) {
1102 page = list_first_entry(&msg->pagelist->head,
1103 struct page, lru);
1104#ifdef CONFIG_BLOCK
1105 } else if (msg->bio) {
1106 struct bio_vec *bv;
1107 1500
1108 bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); 1501 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
1109 page = bv->bv_page; 1502 &last_piece);
1110 bio_offset = bv->bv_offset; 1503 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1111 max_write = bv->bv_len; 1504 length, last_piece);
1112#endif 1505 if (ret <= 0) {
1113 } else { 1506 if (do_datacrc)
1114 page = zero_page; 1507 msg->footer.data_crc = cpu_to_le32(crc);
1115 }
1116 len = min_t(int, max_write - con->out_msg_pos.page_pos,
1117 total_max_write);
1118
1119 if (do_datacrc && !con->out_msg_pos.did_page_crc) {
1120 void *base;
1121 u32 crc = le32_to_cpu(msg->footer.data_crc);
1122 char *kaddr;
1123
1124 kaddr = kmap(page);
1125 BUG_ON(kaddr == NULL);
1126 base = kaddr + con->out_msg_pos.page_pos + bio_offset;
1127 crc = crc32c(crc, base, len);
1128 kunmap(page);
1129 msg->footer.data_crc = cpu_to_le32(crc);
1130 con->out_msg_pos.did_page_crc = true;
1131 }
1132 ret = ceph_tcp_sendpage(con->sock, page,
1133 con->out_msg_pos.page_pos + bio_offset,
1134 len, 1);
1135 if (ret <= 0)
1136 goto out;
1137 1508
1138 out_msg_pos_next(con, page, len, (size_t) ret, in_trail); 1509 return ret;
1510 }
1511 if (do_datacrc && cursor->need_crc)
1512 crc = ceph_crc32c_page(crc, page, page_offset, length);
1513 need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1139 } 1514 }
1140 1515
1141 dout("write_partial_msg_pages %p msg %p done\n", con, msg); 1516 dout("%s %p msg %p done\n", __func__, con, msg);
1142 1517
1143 /* prepare and queue up footer, too */ 1518 /* prepare and queue up footer, too */
1144 if (!do_datacrc) 1519 if (do_datacrc)
1520 msg->footer.data_crc = cpu_to_le32(crc);
1521 else
1145 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 1522 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
1146 con_out_kvec_reset(con); 1523 con_out_kvec_reset(con);
1147 prepare_write_message_footer(con); 1524 prepare_write_message_footer(con);
1148 ret = 1; 1525
1149out: 1526 return 1; /* must return > 0 to indicate success */
1150 return ret;
1151} 1527}
1152 1528
1153/* 1529/*
@@ -1160,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con)
1160 while (con->out_skip > 0) { 1536 while (con->out_skip > 0) {
1161 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); 1537 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
1162 1538
1163 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); 1539 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
1164 if (ret <= 0) 1540 if (ret <= 0)
1165 goto out; 1541 goto out;
1166 con->out_skip -= ret; 1542 con->out_skip -= ret;
@@ -1191,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con)
1191 con->in_base_pos = 0; 1567 con->in_base_pos = 0;
1192} 1568}
1193 1569
1570static void prepare_read_seq(struct ceph_connection *con)
1571{
1572 dout("prepare_read_seq %p\n", con);
1573 con->in_base_pos = 0;
1574 con->in_tag = CEPH_MSGR_TAG_SEQ;
1575}
1576
1194static void prepare_read_tag(struct ceph_connection *con) 1577static void prepare_read_tag(struct ceph_connection *con)
1195{ 1578{
1196 dout("prepare_read_tag %p\n", con); 1579 dout("prepare_read_tag %p\n", con);
@@ -1597,7 +1980,6 @@ static int process_connect(struct ceph_connection *con)
1597 con->error_msg = "connect authorization failure"; 1980 con->error_msg = "connect authorization failure";
1598 return -1; 1981 return -1;
1599 } 1982 }
1600 con->auth_retry = 1;
1601 con_out_kvec_reset(con); 1983 con_out_kvec_reset(con);
1602 ret = prepare_write_connect(con); 1984 ret = prepare_write_connect(con);
1603 if (ret < 0) 1985 if (ret < 0)
@@ -1668,6 +2050,7 @@ static int process_connect(struct ceph_connection *con)
1668 prepare_read_connect(con); 2050 prepare_read_connect(con);
1669 break; 2051 break;
1670 2052
2053 case CEPH_MSGR_TAG_SEQ:
1671 case CEPH_MSGR_TAG_READY: 2054 case CEPH_MSGR_TAG_READY:
1672 if (req_feat & ~server_feat) { 2055 if (req_feat & ~server_feat) {
1673 pr_err("%s%lld %s protocol feature mismatch," 2056 pr_err("%s%lld %s protocol feature mismatch,"
@@ -1682,7 +2065,7 @@ static int process_connect(struct ceph_connection *con)
1682 2065
1683 WARN_ON(con->state != CON_STATE_NEGOTIATING); 2066 WARN_ON(con->state != CON_STATE_NEGOTIATING);
1684 con->state = CON_STATE_OPEN; 2067 con->state = CON_STATE_OPEN;
1685 2068 con->auth_retry = 0; /* we authenticated; clear flag */
1686 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 2069 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
1687 con->connect_seq++; 2070 con->connect_seq++;
1688 con->peer_features = server_feat; 2071 con->peer_features = server_feat;
@@ -1698,7 +2081,12 @@ static int process_connect(struct ceph_connection *con)
1698 2081
1699 con->delay = 0; /* reset backoff memory */ 2082 con->delay = 0; /* reset backoff memory */
1700 2083
1701 prepare_read_tag(con); 2084 if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
2085 prepare_write_seq(con);
2086 prepare_read_seq(con);
2087 } else {
2088 prepare_read_tag(con);
2089 }
1702 break; 2090 break;
1703 2091
1704 case CEPH_MSGR_TAG_WAIT: 2092 case CEPH_MSGR_TAG_WAIT:
@@ -1732,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con)
1732 return read_partial(con, end, size, &con->in_temp_ack); 2120 return read_partial(con, end, size, &con->in_temp_ack);
1733} 2121}
1734 2122
1735
1736/* 2123/*
1737 * We can finally discard anything that's been acked. 2124 * We can finally discard anything that's been acked.
1738 */ 2125 */
@@ -1757,8 +2144,6 @@ static void process_ack(struct ceph_connection *con)
1757} 2144}
1758 2145
1759 2146
1760
1761
1762static int read_partial_message_section(struct ceph_connection *con, 2147static int read_partial_message_section(struct ceph_connection *con,
1763 struct kvec *section, 2148 struct kvec *section,
1764 unsigned int sec_len, u32 *crc) 2149 unsigned int sec_len, u32 *crc)
@@ -1782,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con,
1782 return 1; 2167 return 1;
1783} 2168}
1784 2169
1785static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); 2170static int read_partial_msg_data(struct ceph_connection *con)
1786
1787static int read_partial_message_pages(struct ceph_connection *con,
1788 struct page **pages,
1789 unsigned int data_len, bool do_datacrc)
1790{ 2171{
1791 void *p; 2172 struct ceph_msg *msg = con->in_msg;
2173 struct ceph_msg_data_cursor *cursor = &msg->cursor;
2174 const bool do_datacrc = !con->msgr->nocrc;
2175 struct page *page;
2176 size_t page_offset;
2177 size_t length;
2178 u32 crc = 0;
1792 int ret; 2179 int ret;
1793 int left;
1794 2180
1795 left = min((int)(data_len - con->in_msg_pos.data_pos), 2181 BUG_ON(!msg);
1796 (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); 2182 if (list_empty(&msg->data))
1797 /* (page) data */ 2183 return -EIO;
1798 BUG_ON(pages == NULL);
1799 p = kmap(pages[con->in_msg_pos.page]);
1800 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
1801 left);
1802 if (ret > 0 && do_datacrc)
1803 con->in_data_crc =
1804 crc32c(con->in_data_crc,
1805 p + con->in_msg_pos.page_pos, ret);
1806 kunmap(pages[con->in_msg_pos.page]);
1807 if (ret <= 0)
1808 return ret;
1809 con->in_msg_pos.data_pos += ret;
1810 con->in_msg_pos.page_pos += ret;
1811 if (con->in_msg_pos.page_pos == PAGE_SIZE) {
1812 con->in_msg_pos.page_pos = 0;
1813 con->in_msg_pos.page++;
1814 }
1815
1816 return ret;
1817}
1818
1819#ifdef CONFIG_BLOCK
1820static int read_partial_message_bio(struct ceph_connection *con,
1821 struct bio **bio_iter, int *bio_seg,
1822 unsigned int data_len, bool do_datacrc)
1823{
1824 struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
1825 void *p;
1826 int ret, left;
1827 2184
1828 left = min((int)(data_len - con->in_msg_pos.data_pos), 2185 if (do_datacrc)
1829 (int)(bv->bv_len - con->in_msg_pos.page_pos)); 2186 crc = con->in_data_crc;
2187 while (cursor->resid) {
2188 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
2189 NULL);
2190 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
2191 if (ret <= 0) {
2192 if (do_datacrc)
2193 con->in_data_crc = crc;
1830 2194
1831 p = kmap(bv->bv_page) + bv->bv_offset; 2195 return ret;
2196 }
1832 2197
1833 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, 2198 if (do_datacrc)
1834 left); 2199 crc = ceph_crc32c_page(crc, page, page_offset, ret);
1835 if (ret > 0 && do_datacrc) 2200 (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1836 con->in_data_crc =
1837 crc32c(con->in_data_crc,
1838 p + con->in_msg_pos.page_pos, ret);
1839 kunmap(bv->bv_page);
1840 if (ret <= 0)
1841 return ret;
1842 con->in_msg_pos.data_pos += ret;
1843 con->in_msg_pos.page_pos += ret;
1844 if (con->in_msg_pos.page_pos == bv->bv_len) {
1845 con->in_msg_pos.page_pos = 0;
1846 iter_bio_next(bio_iter, bio_seg);
1847 } 2201 }
2202 if (do_datacrc)
2203 con->in_data_crc = crc;
1848 2204
1849 return ret; 2205 return 1; /* must return > 0 to indicate success */
1850} 2206}
1851#endif
1852 2207
1853/* 2208/*
1854 * read (part of) a message. 2209 * read (part of) a message.
1855 */ 2210 */
2211static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
2212
1856static int read_partial_message(struct ceph_connection *con) 2213static int read_partial_message(struct ceph_connection *con)
1857{ 2214{
1858 struct ceph_msg *m = con->in_msg; 2215 struct ceph_msg *m = con->in_msg;
@@ -1885,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con)
1885 if (front_len > CEPH_MSG_MAX_FRONT_LEN) 2242 if (front_len > CEPH_MSG_MAX_FRONT_LEN)
1886 return -EIO; 2243 return -EIO;
1887 middle_len = le32_to_cpu(con->in_hdr.middle_len); 2244 middle_len = le32_to_cpu(con->in_hdr.middle_len);
1888 if (middle_len > CEPH_MSG_MAX_DATA_LEN) 2245 if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
1889 return -EIO; 2246 return -EIO;
1890 data_len = le32_to_cpu(con->in_hdr.data_len); 2247 data_len = le32_to_cpu(con->in_hdr.data_len);
1891 if (data_len > CEPH_MSG_MAX_DATA_LEN) 2248 if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1914,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con)
1914 int skip = 0; 2271 int skip = 0;
1915 2272
1916 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 2273 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
1917 con->in_hdr.front_len, con->in_hdr.data_len); 2274 front_len, data_len);
1918 ret = ceph_con_in_msg_alloc(con, &skip); 2275 ret = ceph_con_in_msg_alloc(con, &skip);
1919 if (ret < 0) 2276 if (ret < 0)
1920 return ret; 2277 return ret;
2278
2279 BUG_ON(!con->in_msg ^ skip);
2280 if (con->in_msg && data_len > con->in_msg->data_length) {
2281 pr_warning("%s skipping long message (%u > %zd)\n",
2282 __func__, data_len, con->in_msg->data_length);
2283 ceph_msg_put(con->in_msg);
2284 con->in_msg = NULL;
2285 skip = 1;
2286 }
1921 if (skip) { 2287 if (skip) {
1922 /* skip this message */ 2288 /* skip this message */
1923 dout("alloc_msg said skip message\n"); 2289 dout("alloc_msg said skip message\n");
1924 BUG_ON(con->in_msg);
1925 con->in_base_pos = -front_len - middle_len - data_len - 2290 con->in_base_pos = -front_len - middle_len - data_len -
1926 sizeof(m->footer); 2291 sizeof(m->footer);
1927 con->in_tag = CEPH_MSGR_TAG_READY; 2292 con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1936,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con)
1936 if (m->middle) 2301 if (m->middle)
1937 m->middle->vec.iov_len = 0; 2302 m->middle->vec.iov_len = 0;
1938 2303
1939 con->in_msg_pos.page = 0; 2304 /* prepare for data payload, if any */
1940 if (m->pages)
1941 con->in_msg_pos.page_pos = m->page_alignment;
1942 else
1943 con->in_msg_pos.page_pos = 0;
1944 con->in_msg_pos.data_pos = 0;
1945 2305
1946#ifdef CONFIG_BLOCK 2306 if (data_len)
1947 if (m->bio) 2307 prepare_message_data(con->in_msg, data_len);
1948 init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
1949#endif
1950 } 2308 }
1951 2309
1952 /* front */ 2310 /* front */
@@ -1965,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con)
1965 } 2323 }
1966 2324
1967 /* (page) data */ 2325 /* (page) data */
1968 while (con->in_msg_pos.data_pos < data_len) { 2326 if (data_len) {
1969 if (m->pages) { 2327 ret = read_partial_msg_data(con);
1970 ret = read_partial_message_pages(con, m->pages, 2328 if (ret <= 0)
1971 data_len, do_datacrc); 2329 return ret;
1972 if (ret <= 0)
1973 return ret;
1974#ifdef CONFIG_BLOCK
1975 } else if (m->bio) {
1976 BUG_ON(!m->bio_iter);
1977 ret = read_partial_message_bio(con,
1978 &m->bio_iter, &m->bio_seg,
1979 data_len, do_datacrc);
1980 if (ret <= 0)
1981 return ret;
1982#endif
1983 } else {
1984 BUG_ON(1);
1985 }
1986 } 2330 }
1987 2331
1988 /* footer */ 2332 /* footer */
@@ -2108,13 +2452,13 @@ more_kvec:
2108 goto do_next; 2452 goto do_next;
2109 } 2453 }
2110 2454
2111 ret = write_partial_msg_pages(con); 2455 ret = write_partial_message_data(con);
2112 if (ret == 1) 2456 if (ret == 1)
2113 goto more_kvec; /* we need to send the footer, too! */ 2457 goto more_kvec; /* we need to send the footer, too! */
2114 if (ret == 0) 2458 if (ret == 0)
2115 goto out; 2459 goto out;
2116 if (ret < 0) { 2460 if (ret < 0) {
2117 dout("try_write write_partial_msg_pages err %d\n", 2461 dout("try_write write_partial_message_data err %d\n",
2118 ret); 2462 ret);
2119 goto out; 2463 goto out;
2120 } 2464 }
@@ -2266,7 +2610,12 @@ more:
2266 prepare_read_tag(con); 2610 prepare_read_tag(con);
2267 goto more; 2611 goto more;
2268 } 2612 }
2269 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 2613 if (con->in_tag == CEPH_MSGR_TAG_ACK ||
2614 con->in_tag == CEPH_MSGR_TAG_SEQ) {
2615 /*
2616 * the final handshake seq exchange is semantically
2617 * equivalent to an ACK
2618 */
2270 ret = read_partial_ack(con); 2619 ret = read_partial_ack(con);
2271 if (ret <= 0) 2620 if (ret <= 0)
2272 goto out; 2621 goto out;
@@ -2672,6 +3021,88 @@ void ceph_con_keepalive(struct ceph_connection *con)
2672} 3021}
2673EXPORT_SYMBOL(ceph_con_keepalive); 3022EXPORT_SYMBOL(ceph_con_keepalive);
2674 3023
3024static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3025{
3026 struct ceph_msg_data *data;
3027
3028 if (WARN_ON(!ceph_msg_data_type_valid(type)))
3029 return NULL;
3030
3031 data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
3032 if (data)
3033 data->type = type;
3034 INIT_LIST_HEAD(&data->links);
3035
3036 return data;
3037}
3038
3039static void ceph_msg_data_destroy(struct ceph_msg_data *data)
3040{
3041 if (!data)
3042 return;
3043
3044 WARN_ON(!list_empty(&data->links));
3045 if (data->type == CEPH_MSG_DATA_PAGELIST) {
3046 ceph_pagelist_release(data->pagelist);
3047 kfree(data->pagelist);
3048 }
3049 kmem_cache_free(ceph_msg_data_cache, data);
3050}
3051
3052void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
3053 size_t length, size_t alignment)
3054{
3055 struct ceph_msg_data *data;
3056
3057 BUG_ON(!pages);
3058 BUG_ON(!length);
3059
3060 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
3061 BUG_ON(!data);
3062 data->pages = pages;
3063 data->length = length;
3064 data->alignment = alignment & ~PAGE_MASK;
3065
3066 list_add_tail(&data->links, &msg->data);
3067 msg->data_length += length;
3068}
3069EXPORT_SYMBOL(ceph_msg_data_add_pages);
3070
3071void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
3072 struct ceph_pagelist *pagelist)
3073{
3074 struct ceph_msg_data *data;
3075
3076 BUG_ON(!pagelist);
3077 BUG_ON(!pagelist->length);
3078
3079 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
3080 BUG_ON(!data);
3081 data->pagelist = pagelist;
3082
3083 list_add_tail(&data->links, &msg->data);
3084 msg->data_length += pagelist->length;
3085}
3086EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
3087
3088#ifdef CONFIG_BLOCK
3089void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
3090 size_t length)
3091{
3092 struct ceph_msg_data *data;
3093
3094 BUG_ON(!bio);
3095
3096 data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
3097 BUG_ON(!data);
3098 data->bio = bio;
3099 data->bio_length = length;
3100
3101 list_add_tail(&data->links, &msg->data);
3102 msg->data_length += length;
3103}
3104EXPORT_SYMBOL(ceph_msg_data_add_bio);
3105#endif /* CONFIG_BLOCK */
2675 3106
2676/* 3107/*
2677 * construct a new message with given type, size 3108 * construct a new message with given type, size
@@ -2682,49 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
2682{ 3113{
2683 struct ceph_msg *m; 3114 struct ceph_msg *m;
2684 3115
2685 m = kmalloc(sizeof(*m), flags); 3116 m = kmem_cache_zalloc(ceph_msg_cache, flags);
2686 if (m == NULL) 3117 if (m == NULL)
2687 goto out; 3118 goto out;
2688 kref_init(&m->kref);
2689 3119
2690 m->con = NULL;
2691 INIT_LIST_HEAD(&m->list_head);
2692
2693 m->hdr.tid = 0;
2694 m->hdr.type = cpu_to_le16(type); 3120 m->hdr.type = cpu_to_le16(type);
2695 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 3121 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2696 m->hdr.version = 0;
2697 m->hdr.front_len = cpu_to_le32(front_len); 3122 m->hdr.front_len = cpu_to_le32(front_len);
2698 m->hdr.middle_len = 0;
2699 m->hdr.data_len = 0;
2700 m->hdr.data_off = 0;
2701 m->hdr.reserved = 0;
2702 m->footer.front_crc = 0;
2703 m->footer.middle_crc = 0;
2704 m->footer.data_crc = 0;
2705 m->footer.flags = 0;
2706 m->front_max = front_len;
2707 m->front_is_vmalloc = false;
2708 m->more_to_follow = false;
2709 m->ack_stamp = 0;
2710 m->pool = NULL;
2711
2712 /* middle */
2713 m->middle = NULL;
2714 3123
2715 /* data */ 3124 INIT_LIST_HEAD(&m->list_head);
2716 m->nr_pages = 0; 3125 kref_init(&m->kref);
2717 m->page_alignment = 0; 3126 INIT_LIST_HEAD(&m->data);
2718 m->pages = NULL;
2719 m->pagelist = NULL;
2720#ifdef CONFIG_BLOCK
2721 m->bio = NULL;
2722 m->bio_iter = NULL;
2723 m->bio_seg = 0;
2724#endif /* CONFIG_BLOCK */
2725 m->trail = NULL;
2726 3127
2727 /* front */ 3128 /* front */
3129 m->front_max = front_len;
2728 if (front_len) { 3130 if (front_len) {
2729 if (front_len > PAGE_CACHE_SIZE) { 3131 if (front_len > PAGE_CACHE_SIZE) {
2730 m->front.iov_base = __vmalloc(front_len, flags, 3132 m->front.iov_base = __vmalloc(front_len, flags,
@@ -2802,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
2802static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) 3204static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
2803{ 3205{
2804 struct ceph_msg_header *hdr = &con->in_hdr; 3206 struct ceph_msg_header *hdr = &con->in_hdr;
2805 int type = le16_to_cpu(hdr->type);
2806 int front_len = le32_to_cpu(hdr->front_len);
2807 int middle_len = le32_to_cpu(hdr->middle_len); 3207 int middle_len = le32_to_cpu(hdr->middle_len);
3208 struct ceph_msg *msg;
2808 int ret = 0; 3209 int ret = 0;
2809 3210
2810 BUG_ON(con->in_msg != NULL); 3211 BUG_ON(con->in_msg != NULL);
3212 BUG_ON(!con->ops->alloc_msg);
2811 3213
2812 if (con->ops->alloc_msg) { 3214 mutex_unlock(&con->mutex);
2813 struct ceph_msg *msg; 3215 msg = con->ops->alloc_msg(con, hdr, skip);
2814 3216 mutex_lock(&con->mutex);
2815 mutex_unlock(&con->mutex); 3217 if (con->state != CON_STATE_OPEN) {
2816 msg = con->ops->alloc_msg(con, hdr, skip); 3218 if (msg)
2817 mutex_lock(&con->mutex); 3219 ceph_msg_put(msg);
2818 if (con->state != CON_STATE_OPEN) { 3220 return -EAGAIN;
2819 if (msg)
2820 ceph_msg_put(msg);
2821 return -EAGAIN;
2822 }
2823 con->in_msg = msg;
2824 if (con->in_msg) {
2825 con->in_msg->con = con->ops->get(con);
2826 BUG_ON(con->in_msg->con == NULL);
2827 }
2828 if (*skip) {
2829 con->in_msg = NULL;
2830 return 0;
2831 }
2832 if (!con->in_msg) {
2833 con->error_msg =
2834 "error allocating memory for incoming message";
2835 return -ENOMEM;
2836 }
2837 } 3221 }
2838 if (!con->in_msg) { 3222 if (msg) {
2839 con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); 3223 BUG_ON(*skip);
2840 if (!con->in_msg) { 3224 con->in_msg = msg;
2841 pr_err("unable to allocate msg type %d len %d\n",
2842 type, front_len);
2843 return -ENOMEM;
2844 }
2845 con->in_msg->con = con->ops->get(con); 3225 con->in_msg->con = con->ops->get(con);
2846 BUG_ON(con->in_msg->con == NULL); 3226 BUG_ON(con->in_msg->con == NULL);
2847 con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); 3227 } else {
3228 /*
3229 * Null message pointer means either we should skip
3230 * this message or we couldn't allocate memory. The
3231 * former is not an error.
3232 */
3233 if (*skip)
3234 return 0;
3235 con->error_msg = "error allocating memory for incoming message";
3236
3237 return -ENOMEM;
2848 } 3238 }
2849 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 3239 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2850 3240
@@ -2870,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m)
2870 vfree(m->front.iov_base); 3260 vfree(m->front.iov_base);
2871 else 3261 else
2872 kfree(m->front.iov_base); 3262 kfree(m->front.iov_base);
2873 kfree(m); 3263 kmem_cache_free(ceph_msg_cache, m);
2874} 3264}
2875 3265
2876/* 3266/*
@@ -2879,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m)
2879void ceph_msg_last_put(struct kref *kref) 3269void ceph_msg_last_put(struct kref *kref)
2880{ 3270{
2881 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 3271 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3272 LIST_HEAD(data);
3273 struct list_head *links;
3274 struct list_head *next;
2882 3275
2883 dout("ceph_msg_put last one on %p\n", m); 3276 dout("ceph_msg_put last one on %p\n", m);
2884 WARN_ON(!list_empty(&m->list_head)); 3277 WARN_ON(!list_empty(&m->list_head));
@@ -2888,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref)
2888 ceph_buffer_put(m->middle); 3281 ceph_buffer_put(m->middle);
2889 m->middle = NULL; 3282 m->middle = NULL;
2890 } 3283 }
2891 m->nr_pages = 0;
2892 m->pages = NULL;
2893 3284
2894 if (m->pagelist) { 3285 list_splice_init(&m->data, &data);
2895 ceph_pagelist_release(m->pagelist); 3286 list_for_each_safe(links, next, &data) {
2896 kfree(m->pagelist); 3287 struct ceph_msg_data *data;
2897 m->pagelist = NULL;
2898 }
2899 3288
2900 m->trail = NULL; 3289 data = list_entry(links, struct ceph_msg_data, links);
3290 list_del_init(links);
3291 ceph_msg_data_destroy(data);
3292 }
3293 m->data_length = 0;
2901 3294
2902 if (m->pool) 3295 if (m->pool)
2903 ceph_msgpool_put(m->pool, m); 3296 ceph_msgpool_put(m->pool, m);
@@ -2908,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
2908 3301
2909void ceph_msg_dump(struct ceph_msg *msg) 3302void ceph_msg_dump(struct ceph_msg *msg)
2910{ 3303{
2911 pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, 3304 pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
2912 msg->front_max, msg->nr_pages); 3305 msg->front_max, msg->data_length);
2913 print_hex_dump(KERN_DEBUG, "header: ", 3306 print_hex_dump(KERN_DEBUG, "header: ",
2914 DUMP_PREFIX_OFFSET, 16, 1, 3307 DUMP_PREFIX_OFFSET, 16, 1,
2915 &msg->hdr, sizeof(msg->hdr), true); 3308 &msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index aef5b1062bee..1fe25cd29d0e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work)
737 737
738 __validate_auth(monc); 738 __validate_auth(monc);
739 739
740 if (monc->auth->ops->is_authenticated(monc->auth)) 740 if (ceph_auth_is_authenticated(monc->auth))
741 __send_subscribe(monc); 741 __send_subscribe(monc);
742 } 742 }
743 __schedule_delayed(monc); 743 __schedule_delayed(monc);
@@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
892 892
893 mutex_lock(&monc->mutex); 893 mutex_lock(&monc->mutex);
894 had_debugfs_info = have_debugfs_info(monc); 894 had_debugfs_info = have_debugfs_info(monc);
895 if (monc->auth->ops) 895 was_auth = ceph_auth_is_authenticated(monc->auth);
896 was_auth = monc->auth->ops->is_authenticated(monc->auth);
897 monc->pending_auth = 0; 896 monc->pending_auth = 0;
898 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, 897 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
899 msg->front.iov_len, 898 msg->front.iov_len,
@@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
904 wake_up_all(&monc->client->auth_wq); 903 wake_up_all(&monc->client->auth_wq);
905 } else if (ret > 0) { 904 } else if (ret > 0) {
906 __send_prepared_auth_request(monc, ret); 905 __send_prepared_auth_request(monc, ret);
907 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { 906 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
908 dout("authenticated, starting session\n"); 907 dout("authenticated, starting session\n");
909 908
910 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; 909 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d730dd4d8eb2..d5953b87918c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
1
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -21,6 +22,8 @@
21#define OSD_OP_FRONT_LEN 4096 22#define OSD_OP_FRONT_LEN 4096
22#define OSD_OPREPLY_FRONT_LEN 512 23#define OSD_OPREPLY_FRONT_LEN 512
23 24
25static struct kmem_cache *ceph_osd_request_cache;
26
24static const struct ceph_connection_operations osd_con_ops; 27static const struct ceph_connection_operations osd_con_ops;
25 28
26static void __send_queued(struct ceph_osd_client *osdc); 29static void __send_queued(struct ceph_osd_client *osdc);
@@ -32,12 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
32static void __send_request(struct ceph_osd_client *osdc, 35static void __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req); 36 struct ceph_osd_request *req);
34 37
35static int op_has_extent(int op)
36{
37 return (op == CEPH_OSD_OP_READ ||
38 op == CEPH_OSD_OP_WRITE);
39}
40
41/* 38/*
42 * Implement client access to distributed object storage cluster. 39 * Implement client access to distributed object storage cluster.
43 * 40 *
@@ -63,53 +60,238 @@ static int op_has_extent(int op)
63 * 60 *
64 * fill osd op in request message. 61 * fill osd op in request message.
65 */ 62 */
66static int calc_layout(struct ceph_vino vino, 63static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
67 struct ceph_file_layout *layout, 64 u64 *objnum, u64 *objoff, u64 *objlen)
68 u64 off, u64 *plen,
69 struct ceph_osd_request *req,
70 struct ceph_osd_req_op *op)
71{ 65{
72 u64 orig_len = *plen; 66 u64 orig_len = *plen;
73 u64 bno = 0;
74 u64 objoff = 0;
75 u64 objlen = 0;
76 int r; 67 int r;
77 68
78 /* object extent? */ 69 /* object extent? */
79 r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, 70 r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
80 &objoff, &objlen); 71 objoff, objlen);
81 if (r < 0) 72 if (r < 0)
82 return r; 73 return r;
83 if (objlen < orig_len) { 74 if (*objlen < orig_len) {
84 *plen = objlen; 75 *plen = *objlen;
85 dout(" skipping last %llu, final file extent %llu~%llu\n", 76 dout(" skipping last %llu, final file extent %llu~%llu\n",
86 orig_len - *plen, off, *plen); 77 orig_len - *plen, off, *plen);
87 } 78 }
88 79
89 if (op_has_extent(op->op)) { 80 dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
90 u32 osize = le32_to_cpu(layout->fl_object_size); 81
91 op->extent.offset = objoff; 82 return 0;
92 op->extent.length = objlen; 83}
93 if (op->extent.truncate_size <= off - objoff) { 84
94 op->extent.truncate_size = 0; 85static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
95 } else { 86{
96 op->extent.truncate_size -= off - objoff; 87 memset(osd_data, 0, sizeof (*osd_data));
97 if (op->extent.truncate_size > osize) 88 osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
98 op->extent.truncate_size = osize; 89}
99 } 90
91static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
92 struct page **pages, u64 length, u32 alignment,
93 bool pages_from_pool, bool own_pages)
94{
95 osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
96 osd_data->pages = pages;
97 osd_data->length = length;
98 osd_data->alignment = alignment;
99 osd_data->pages_from_pool = pages_from_pool;
100 osd_data->own_pages = own_pages;
101}
102
103static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
104 struct ceph_pagelist *pagelist)
105{
106 osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
107 osd_data->pagelist = pagelist;
108}
109
110#ifdef CONFIG_BLOCK
111static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
112 struct bio *bio, size_t bio_length)
113{
114 osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
115 osd_data->bio = bio;
116 osd_data->bio_length = bio_length;
117}
118#endif /* CONFIG_BLOCK */
119
120#define osd_req_op_data(oreq, whch, typ, fld) \
121 ({ \
122 BUG_ON(whch >= (oreq)->r_num_ops); \
123 &(oreq)->r_ops[whch].typ.fld; \
124 })
125
126static struct ceph_osd_data *
127osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
128{
129 BUG_ON(which >= osd_req->r_num_ops);
130
131 return &osd_req->r_ops[which].raw_data_in;
132}
133
134struct ceph_osd_data *
135osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
136 unsigned int which)
137{
138 return osd_req_op_data(osd_req, which, extent, osd_data);
139}
140EXPORT_SYMBOL(osd_req_op_extent_osd_data);
141
142struct ceph_osd_data *
143osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
144 unsigned int which)
145{
146 return osd_req_op_data(osd_req, which, cls, response_data);
147}
148EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
149
150void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
151 unsigned int which, struct page **pages,
152 u64 length, u32 alignment,
153 bool pages_from_pool, bool own_pages)
154{
155 struct ceph_osd_data *osd_data;
156
157 osd_data = osd_req_op_raw_data_in(osd_req, which);
158 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
159 pages_from_pool, own_pages);
160}
161EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
162
163void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
164 unsigned int which, struct page **pages,
165 u64 length, u32 alignment,
166 bool pages_from_pool, bool own_pages)
167{
168 struct ceph_osd_data *osd_data;
169
170 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
171 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
172 pages_from_pool, own_pages);
173}
174EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
175
176void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
177 unsigned int which, struct ceph_pagelist *pagelist)
178{
179 struct ceph_osd_data *osd_data;
180
181 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
182 ceph_osd_data_pagelist_init(osd_data, pagelist);
183}
184EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
185
186#ifdef CONFIG_BLOCK
187void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
188 unsigned int which, struct bio *bio, size_t bio_length)
189{
190 struct ceph_osd_data *osd_data;
191
192 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
193 ceph_osd_data_bio_init(osd_data, bio, bio_length);
194}
195EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
196#endif /* CONFIG_BLOCK */
197
198static void osd_req_op_cls_request_info_pagelist(
199 struct ceph_osd_request *osd_req,
200 unsigned int which, struct ceph_pagelist *pagelist)
201{
202 struct ceph_osd_data *osd_data;
203
204 osd_data = osd_req_op_data(osd_req, which, cls, request_info);
205 ceph_osd_data_pagelist_init(osd_data, pagelist);
206}
207
208void osd_req_op_cls_request_data_pagelist(
209 struct ceph_osd_request *osd_req,
210 unsigned int which, struct ceph_pagelist *pagelist)
211{
212 struct ceph_osd_data *osd_data;
213
214 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
215 ceph_osd_data_pagelist_init(osd_data, pagelist);
216}
217EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
218
219void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
220 unsigned int which, struct page **pages, u64 length,
221 u32 alignment, bool pages_from_pool, bool own_pages)
222{
223 struct ceph_osd_data *osd_data;
224
225 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
226 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
227 pages_from_pool, own_pages);
228}
229EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
230
231void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
232 unsigned int which, struct page **pages, u64 length,
233 u32 alignment, bool pages_from_pool, bool own_pages)
234{
235 struct ceph_osd_data *osd_data;
236
237 osd_data = osd_req_op_data(osd_req, which, cls, response_data);
238 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
239 pages_from_pool, own_pages);
240}
241EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
242
243static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
244{
245 switch (osd_data->type) {
246 case CEPH_OSD_DATA_TYPE_NONE:
247 return 0;
248 case CEPH_OSD_DATA_TYPE_PAGES:
249 return osd_data->length;
250 case CEPH_OSD_DATA_TYPE_PAGELIST:
251 return (u64)osd_data->pagelist->length;
252#ifdef CONFIG_BLOCK
253 case CEPH_OSD_DATA_TYPE_BIO:
254 return (u64)osd_data->bio_length;
255#endif /* CONFIG_BLOCK */
256 default:
257 WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
258 return 0;
100 } 259 }
101 req->r_num_pages = calc_pages_for(off, *plen); 260}
102 req->r_page_alignment = off & ~PAGE_MASK;
103 if (op->op == CEPH_OSD_OP_WRITE)
104 op->payload_len = *plen;
105 261
106 dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", 262static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
107 bno, objoff, objlen, req->r_num_pages); 263{
264 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
265 int num_pages;
108 266
109 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); 267 num_pages = calc_pages_for((u64)osd_data->alignment,
110 req->r_oid_len = strlen(req->r_oid); 268 (u64)osd_data->length);
269 ceph_release_page_vector(osd_data->pages, num_pages);
270 }
271 ceph_osd_data_init(osd_data);
272}
273
274static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
275 unsigned int which)
276{
277 struct ceph_osd_req_op *op;
278
279 BUG_ON(which >= osd_req->r_num_ops);
280 op = &osd_req->r_ops[which];
111 281
112 return r; 282 switch (op->op) {
283 case CEPH_OSD_OP_READ:
284 case CEPH_OSD_OP_WRITE:
285 ceph_osd_data_release(&op->extent.osd_data);
286 break;
287 case CEPH_OSD_OP_CALL:
288 ceph_osd_data_release(&op->cls.request_info);
289 ceph_osd_data_release(&op->cls.request_data);
290 ceph_osd_data_release(&op->cls.response_data);
291 break;
292 default:
293 break;
294 }
113} 295}
114 296
115/* 297/*
@@ -117,30 +299,26 @@ static int calc_layout(struct ceph_vino vino,
117 */ 299 */
118void ceph_osdc_release_request(struct kref *kref) 300void ceph_osdc_release_request(struct kref *kref)
119{ 301{
120 struct ceph_osd_request *req = container_of(kref, 302 struct ceph_osd_request *req;
121 struct ceph_osd_request, 303 unsigned int which;
122 r_kref);
123 304
305 req = container_of(kref, struct ceph_osd_request, r_kref);
124 if (req->r_request) 306 if (req->r_request)
125 ceph_msg_put(req->r_request); 307 ceph_msg_put(req->r_request);
126 if (req->r_con_filling_msg) { 308 if (req->r_reply) {
127 dout("%s revoking msg %p from con %p\n", __func__,
128 req->r_reply, req->r_con_filling_msg);
129 ceph_msg_revoke_incoming(req->r_reply); 309 ceph_msg_revoke_incoming(req->r_reply);
130 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
131 req->r_con_filling_msg = NULL;
132 }
133 if (req->r_reply)
134 ceph_msg_put(req->r_reply); 310 ceph_msg_put(req->r_reply);
135 if (req->r_own_pages) 311 }
136 ceph_release_page_vector(req->r_pages, 312
137 req->r_num_pages); 313 for (which = 0; which < req->r_num_ops; which++)
314 osd_req_op_data_release(req, which);
315
138 ceph_put_snap_context(req->r_snapc); 316 ceph_put_snap_context(req->r_snapc);
139 ceph_pagelist_release(&req->r_trail);
140 if (req->r_mempool) 317 if (req->r_mempool)
141 mempool_free(req, req->r_osdc->req_mempool); 318 mempool_free(req, req->r_osdc->req_mempool);
142 else 319 else
143 kfree(req); 320 kmem_cache_free(ceph_osd_request_cache, req);
321
144} 322}
145EXPORT_SYMBOL(ceph_osdc_release_request); 323EXPORT_SYMBOL(ceph_osdc_release_request);
146 324
@@ -154,6 +332,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
154 struct ceph_msg *msg; 332 struct ceph_msg *msg;
155 size_t msg_size; 333 size_t msg_size;
156 334
335 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
336 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
337
157 msg_size = 4 + 4 + 8 + 8 + 4+8; 338 msg_size = 4 + 4 + 8 + 8 + 4+8;
158 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ 339 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
159 msg_size += 1 + 8 + 4 + 4; /* pg_t */ 340 msg_size += 1 + 8 + 4 + 4; /* pg_t */
@@ -168,13 +349,14 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
168 req = mempool_alloc(osdc->req_mempool, gfp_flags); 349 req = mempool_alloc(osdc->req_mempool, gfp_flags);
169 memset(req, 0, sizeof(*req)); 350 memset(req, 0, sizeof(*req));
170 } else { 351 } else {
171 req = kzalloc(sizeof(*req), gfp_flags); 352 req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
172 } 353 }
173 if (req == NULL) 354 if (req == NULL)
174 return NULL; 355 return NULL;
175 356
176 req->r_osdc = osdc; 357 req->r_osdc = osdc;
177 req->r_mempool = use_mempool; 358 req->r_mempool = use_mempool;
359 req->r_num_ops = num_ops;
178 360
179 kref_init(&req->r_kref); 361 kref_init(&req->r_kref);
180 init_completion(&req->r_completion); 362 init_completion(&req->r_completion);
@@ -198,8 +380,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
198 } 380 }
199 req->r_reply = msg; 381 req->r_reply = msg;
200 382
201 ceph_pagelist_init(&req->r_trail);
202
203 /* create request message; allow space for oid */ 383 /* create request message; allow space for oid */
204 if (use_mempool) 384 if (use_mempool)
205 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 385 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -218,60 +398,24 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
218} 398}
219EXPORT_SYMBOL(ceph_osdc_alloc_request); 399EXPORT_SYMBOL(ceph_osdc_alloc_request);
220 400
221static void osd_req_encode_op(struct ceph_osd_request *req, 401static bool osd_req_opcode_valid(u16 opcode)
222 struct ceph_osd_op *dst,
223 struct ceph_osd_req_op *src)
224{ 402{
225 dst->op = cpu_to_le16(src->op); 403 switch (opcode) {
226
227 switch (src->op) {
228 case CEPH_OSD_OP_STAT:
229 break;
230 case CEPH_OSD_OP_READ: 404 case CEPH_OSD_OP_READ:
231 case CEPH_OSD_OP_WRITE: 405 case CEPH_OSD_OP_STAT:
232 dst->extent.offset =
233 cpu_to_le64(src->extent.offset);
234 dst->extent.length =
235 cpu_to_le64(src->extent.length);
236 dst->extent.truncate_size =
237 cpu_to_le64(src->extent.truncate_size);
238 dst->extent.truncate_seq =
239 cpu_to_le32(src->extent.truncate_seq);
240 break;
241 case CEPH_OSD_OP_CALL:
242 dst->cls.class_len = src->cls.class_len;
243 dst->cls.method_len = src->cls.method_len;
244 dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
245
246 ceph_pagelist_append(&req->r_trail, src->cls.class_name,
247 src->cls.class_len);
248 ceph_pagelist_append(&req->r_trail, src->cls.method_name,
249 src->cls.method_len);
250 ceph_pagelist_append(&req->r_trail, src->cls.indata,
251 src->cls.indata_len);
252 break;
253 case CEPH_OSD_OP_STARTSYNC:
254 break;
255 case CEPH_OSD_OP_NOTIFY_ACK:
256 case CEPH_OSD_OP_WATCH:
257 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
258 dst->watch.ver = cpu_to_le64(src->watch.ver);
259 dst->watch.flag = src->watch.flag;
260 break;
261 default:
262 pr_err("unrecognized osd opcode %d\n", dst->op);
263 WARN_ON(1);
264 break;
265 case CEPH_OSD_OP_MAPEXT: 406 case CEPH_OSD_OP_MAPEXT:
266 case CEPH_OSD_OP_MASKTRUNC: 407 case CEPH_OSD_OP_MASKTRUNC:
267 case CEPH_OSD_OP_SPARSE_READ: 408 case CEPH_OSD_OP_SPARSE_READ:
268 case CEPH_OSD_OP_NOTIFY: 409 case CEPH_OSD_OP_NOTIFY:
410 case CEPH_OSD_OP_NOTIFY_ACK:
269 case CEPH_OSD_OP_ASSERT_VER: 411 case CEPH_OSD_OP_ASSERT_VER:
412 case CEPH_OSD_OP_WRITE:
270 case CEPH_OSD_OP_WRITEFULL: 413 case CEPH_OSD_OP_WRITEFULL:
271 case CEPH_OSD_OP_TRUNCATE: 414 case CEPH_OSD_OP_TRUNCATE:
272 case CEPH_OSD_OP_ZERO: 415 case CEPH_OSD_OP_ZERO:
273 case CEPH_OSD_OP_DELETE: 416 case CEPH_OSD_OP_DELETE:
274 case CEPH_OSD_OP_APPEND: 417 case CEPH_OSD_OP_APPEND:
418 case CEPH_OSD_OP_STARTSYNC:
275 case CEPH_OSD_OP_SETTRUNC: 419 case CEPH_OSD_OP_SETTRUNC:
276 case CEPH_OSD_OP_TRIMTRUNC: 420 case CEPH_OSD_OP_TRIMTRUNC:
277 case CEPH_OSD_OP_TMAPUP: 421 case CEPH_OSD_OP_TMAPUP:
@@ -279,11 +423,11 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
279 case CEPH_OSD_OP_TMAPGET: 423 case CEPH_OSD_OP_TMAPGET:
280 case CEPH_OSD_OP_CREATE: 424 case CEPH_OSD_OP_CREATE:
281 case CEPH_OSD_OP_ROLLBACK: 425 case CEPH_OSD_OP_ROLLBACK:
426 case CEPH_OSD_OP_WATCH:
282 case CEPH_OSD_OP_OMAPGETKEYS: 427 case CEPH_OSD_OP_OMAPGETKEYS:
283 case CEPH_OSD_OP_OMAPGETVALS: 428 case CEPH_OSD_OP_OMAPGETVALS:
284 case CEPH_OSD_OP_OMAPGETHEADER: 429 case CEPH_OSD_OP_OMAPGETHEADER:
285 case CEPH_OSD_OP_OMAPGETVALSBYKEYS: 430 case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
286 case CEPH_OSD_OP_MODE_RD:
287 case CEPH_OSD_OP_OMAPSETVALS: 431 case CEPH_OSD_OP_OMAPSETVALS:
288 case CEPH_OSD_OP_OMAPSETHEADER: 432 case CEPH_OSD_OP_OMAPSETHEADER:
289 case CEPH_OSD_OP_OMAPCLEAR: 433 case CEPH_OSD_OP_OMAPCLEAR:
@@ -314,113 +458,233 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
314 case CEPH_OSD_OP_RDUNLOCK: 458 case CEPH_OSD_OP_RDUNLOCK:
315 case CEPH_OSD_OP_UPLOCK: 459 case CEPH_OSD_OP_UPLOCK:
316 case CEPH_OSD_OP_DNLOCK: 460 case CEPH_OSD_OP_DNLOCK:
461 case CEPH_OSD_OP_CALL:
317 case CEPH_OSD_OP_PGLS: 462 case CEPH_OSD_OP_PGLS:
318 case CEPH_OSD_OP_PGLS_FILTER: 463 case CEPH_OSD_OP_PGLS_FILTER:
319 pr_err("unsupported osd opcode %s\n", 464 return true;
320 ceph_osd_op_name(dst->op)); 465 default:
321 WARN_ON(1); 466 return false;
322 break;
323 } 467 }
324 dst->payload_len = cpu_to_le32(src->payload_len);
325} 468}
326 469
327/* 470/*
328 * build new request AND message 471 * This is an osd op init function for opcodes that have no data or
329 * 472 * other information associated with them. It also serves as a
473 * common init routine for all the other init functions, below.
330 */ 474 */
331void ceph_osdc_build_request(struct ceph_osd_request *req, 475static struct ceph_osd_req_op *
332 u64 off, u64 len, unsigned int num_ops, 476_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
333 struct ceph_osd_req_op *src_ops, 477 u16 opcode)
334 struct ceph_snap_context *snapc, u64 snap_id,
335 struct timespec *mtime)
336{ 478{
337 struct ceph_msg *msg = req->r_request; 479 struct ceph_osd_req_op *op;
338 struct ceph_osd_req_op *src_op;
339 void *p;
340 size_t msg_size;
341 int flags = req->r_flags;
342 u64 data_len;
343 int i;
344 480
345 req->r_num_ops = num_ops; 481 BUG_ON(which >= osd_req->r_num_ops);
346 req->r_snapid = snap_id; 482 BUG_ON(!osd_req_opcode_valid(opcode));
347 req->r_snapc = ceph_get_snap_context(snapc);
348 483
349 /* encode request */ 484 op = &osd_req->r_ops[which];
350 msg->hdr.version = cpu_to_le16(4); 485 memset(op, 0, sizeof (*op));
486 op->op = opcode;
351 487
352 p = msg->front.iov_base; 488 return op;
353 ceph_encode_32(&p, 1); /* client_inc is always 1 */ 489}
354 req->r_request_osdmap_epoch = p;
355 p += 4;
356 req->r_request_flags = p;
357 p += 4;
358 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
359 ceph_encode_timespec(p, mtime);
360 p += sizeof(struct ceph_timespec);
361 req->r_request_reassert_version = p;
362 p += sizeof(struct ceph_eversion); /* will get filled in */
363 490
364 /* oloc */ 491void osd_req_op_init(struct ceph_osd_request *osd_req,
365 ceph_encode_8(&p, 4); 492 unsigned int which, u16 opcode)
366 ceph_encode_8(&p, 4); 493{
367 ceph_encode_32(&p, 8 + 4 + 4); 494 (void)_osd_req_op_init(osd_req, which, opcode);
368 req->r_request_pool = p; 495}
369 p += 8; 496EXPORT_SYMBOL(osd_req_op_init);
370 ceph_encode_32(&p, -1); /* preferred */
371 ceph_encode_32(&p, 0); /* key len */
372 497
373 ceph_encode_8(&p, 1); 498void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
374 req->r_request_pgid = p; 499 unsigned int which, u16 opcode,
375 p += 8 + 4; 500 u64 offset, u64 length,
376 ceph_encode_32(&p, -1); /* preferred */ 501 u64 truncate_size, u32 truncate_seq)
502{
503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
504 size_t payload_len = 0;
377 505
378 /* oid */ 506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
379 ceph_encode_32(&p, req->r_oid_len);
380 memcpy(p, req->r_oid, req->r_oid_len);
381 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
382 p += req->r_oid_len;
383 507
384 /* ops */ 508 op->extent.offset = offset;
385 ceph_encode_16(&p, num_ops); 509 op->extent.length = length;
386 src_op = src_ops; 510 op->extent.truncate_size = truncate_size;
387 req->r_request_ops = p; 511 op->extent.truncate_seq = truncate_seq;
388 for (i = 0; i < num_ops; i++, src_op++) { 512 if (opcode == CEPH_OSD_OP_WRITE)
389 osd_req_encode_op(req, p, src_op); 513 payload_len += length;
390 p += sizeof(struct ceph_osd_op);
391 }
392 514
393 /* snaps */ 515 op->payload_len = payload_len;
394 ceph_encode_64(&p, req->r_snapid); 516}
395 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); 517EXPORT_SYMBOL(osd_req_op_extent_init);
396 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); 518
397 if (req->r_snapc) { 519void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
398 for (i = 0; i < snapc->num_snaps; i++) { 520 unsigned int which, u64 length)
399 ceph_encode_64(&p, req->r_snapc->snaps[i]); 521{
400 } 522 struct ceph_osd_req_op *op;
523 u64 previous;
524
525 BUG_ON(which >= osd_req->r_num_ops);
526 op = &osd_req->r_ops[which];
527 previous = op->extent.length;
528
529 if (length == previous)
530 return; /* Nothing to do */
531 BUG_ON(length > previous);
532
533 op->extent.length = length;
534 op->payload_len -= previous - length;
535}
536EXPORT_SYMBOL(osd_req_op_extent_update);
537
538void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
539 u16 opcode, const char *class, const char *method)
540{
541 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
542 struct ceph_pagelist *pagelist;
543 size_t payload_len = 0;
544 size_t size;
545
546 BUG_ON(opcode != CEPH_OSD_OP_CALL);
547
548 pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
549 BUG_ON(!pagelist);
550 ceph_pagelist_init(pagelist);
551
552 op->cls.class_name = class;
553 size = strlen(class);
554 BUG_ON(size > (size_t) U8_MAX);
555 op->cls.class_len = size;
556 ceph_pagelist_append(pagelist, class, size);
557 payload_len += size;
558
559 op->cls.method_name = method;
560 size = strlen(method);
561 BUG_ON(size > (size_t) U8_MAX);
562 op->cls.method_len = size;
563 ceph_pagelist_append(pagelist, method, size);
564 payload_len += size;
565
566 osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
567
568 op->cls.argc = 0; /* currently unused */
569
570 op->payload_len = payload_len;
571}
572EXPORT_SYMBOL(osd_req_op_cls_init);
573
574void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
575 unsigned int which, u16 opcode,
576 u64 cookie, u64 version, int flag)
577{
578 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
579
580 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
581
582 op->watch.cookie = cookie;
583 op->watch.ver = version;
584 if (opcode == CEPH_OSD_OP_WATCH && flag)
585 op->watch.flag = (u8)1;
586}
587EXPORT_SYMBOL(osd_req_op_watch_init);
588
589static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
590 struct ceph_osd_data *osd_data)
591{
592 u64 length = ceph_osd_data_length(osd_data);
593
594 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
595 BUG_ON(length > (u64) SIZE_MAX);
596 if (length)
597 ceph_msg_data_add_pages(msg, osd_data->pages,
598 length, osd_data->alignment);
599 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
600 BUG_ON(!length);
601 ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
602#ifdef CONFIG_BLOCK
603 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
604 ceph_msg_data_add_bio(msg, osd_data->bio, length);
605#endif
606 } else {
607 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
401 } 608 }
609}
402 610
403 req->r_request_attempts = p; 611static u64 osd_req_encode_op(struct ceph_osd_request *req,
404 p += 4; 612 struct ceph_osd_op *dst, unsigned int which)
613{
614 struct ceph_osd_req_op *src;
615 struct ceph_osd_data *osd_data;
616 u64 request_data_len = 0;
617 u64 data_length;
405 618
406 data_len = req->r_trail.length; 619 BUG_ON(which >= req->r_num_ops);
407 if (flags & CEPH_OSD_FLAG_WRITE) { 620 src = &req->r_ops[which];
408 req->r_request->hdr.data_off = cpu_to_le16(off); 621 if (WARN_ON(!osd_req_opcode_valid(src->op))) {
409 data_len += len; 622 pr_err("unrecognized osd opcode %d\n", src->op);
623
624 return 0;
410 } 625 }
411 req->r_request->hdr.data_len = cpu_to_le32(data_len);
412 req->r_request->page_alignment = req->r_page_alignment;
413 626
414 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 627 switch (src->op) {
415 msg_size = p - msg->front.iov_base; 628 case CEPH_OSD_OP_STAT:
416 msg->front.iov_len = msg_size; 629 osd_data = &src->raw_data_in;
417 msg->hdr.front_len = cpu_to_le32(msg_size); 630 ceph_osdc_msg_data_add(req->r_reply, osd_data);
631 break;
632 case CEPH_OSD_OP_READ:
633 case CEPH_OSD_OP_WRITE:
634 if (src->op == CEPH_OSD_OP_WRITE)
635 request_data_len = src->extent.length;
636 dst->extent.offset = cpu_to_le64(src->extent.offset);
637 dst->extent.length = cpu_to_le64(src->extent.length);
638 dst->extent.truncate_size =
639 cpu_to_le64(src->extent.truncate_size);
640 dst->extent.truncate_seq =
641 cpu_to_le32(src->extent.truncate_seq);
642 osd_data = &src->extent.osd_data;
643 if (src->op == CEPH_OSD_OP_WRITE)
644 ceph_osdc_msg_data_add(req->r_request, osd_data);
645 else
646 ceph_osdc_msg_data_add(req->r_reply, osd_data);
647 break;
648 case CEPH_OSD_OP_CALL:
649 dst->cls.class_len = src->cls.class_len;
650 dst->cls.method_len = src->cls.method_len;
651 osd_data = &src->cls.request_info;
652 ceph_osdc_msg_data_add(req->r_request, osd_data);
653 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
654 request_data_len = osd_data->pagelist->length;
655
656 osd_data = &src->cls.request_data;
657 data_length = ceph_osd_data_length(osd_data);
658 if (data_length) {
659 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
660 dst->cls.indata_len = cpu_to_le32(data_length);
661 ceph_osdc_msg_data_add(req->r_request, osd_data);
662 src->payload_len += data_length;
663 request_data_len += data_length;
664 }
665 osd_data = &src->cls.response_data;
666 ceph_osdc_msg_data_add(req->r_reply, osd_data);
667 break;
668 case CEPH_OSD_OP_STARTSYNC:
669 break;
670 case CEPH_OSD_OP_NOTIFY_ACK:
671 case CEPH_OSD_OP_WATCH:
672 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
673 dst->watch.ver = cpu_to_le64(src->watch.ver);
674 dst->watch.flag = src->watch.flag;
675 break;
676 default:
677 pr_err("unsupported osd opcode %s\n",
678 ceph_osd_op_name(src->op));
679 WARN_ON(1);
418 680
419 dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, 681 return 0;
420 num_ops); 682 }
421 return; 683 dst->op = cpu_to_le16(src->op);
684 dst->payload_len = cpu_to_le32(src->payload_len);
685
686 return request_data_len;
422} 687}
423EXPORT_SYMBOL(ceph_osdc_build_request);
424 688
425/* 689/*
426 * build new request AND message, calculate layout, and adjust file 690 * build new request AND message, calculate layout, and adjust file
@@ -436,51 +700,63 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
436struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 700struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
437 struct ceph_file_layout *layout, 701 struct ceph_file_layout *layout,
438 struct ceph_vino vino, 702 struct ceph_vino vino,
439 u64 off, u64 *plen, 703 u64 off, u64 *plen, int num_ops,
440 int opcode, int flags, 704 int opcode, int flags,
441 struct ceph_snap_context *snapc, 705 struct ceph_snap_context *snapc,
442 int do_sync,
443 u32 truncate_seq, 706 u32 truncate_seq,
444 u64 truncate_size, 707 u64 truncate_size,
445 struct timespec *mtime, 708 bool use_mempool)
446 bool use_mempool,
447 int page_align)
448{ 709{
449 struct ceph_osd_req_op ops[2];
450 struct ceph_osd_request *req; 710 struct ceph_osd_request *req;
451 unsigned int num_op = 1; 711 u64 objnum = 0;
712 u64 objoff = 0;
713 u64 objlen = 0;
714 u32 object_size;
715 u64 object_base;
452 int r; 716 int r;
453 717
454 memset(&ops, 0, sizeof ops); 718 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
455
456 ops[0].op = opcode;
457 ops[0].extent.truncate_seq = truncate_seq;
458 ops[0].extent.truncate_size = truncate_size;
459
460 if (do_sync) {
461 ops[1].op = CEPH_OSD_OP_STARTSYNC;
462 num_op++;
463 }
464 719
465 req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, 720 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
466 GFP_NOFS); 721 GFP_NOFS);
467 if (!req) 722 if (!req)
468 return ERR_PTR(-ENOMEM); 723 return ERR_PTR(-ENOMEM);
724
469 req->r_flags = flags; 725 req->r_flags = flags;
470 726
471 /* calculate max write size */ 727 /* calculate max write size */
472 r = calc_layout(vino, layout, off, plen, req, ops); 728 r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
473 if (r < 0) 729 if (r < 0) {
730 ceph_osdc_put_request(req);
474 return ERR_PTR(r); 731 return ERR_PTR(r);
475 req->r_file_layout = *layout; /* keep a copy */ 732 }
476 733
477 /* in case it differs from natural (file) alignment that 734 object_size = le32_to_cpu(layout->fl_object_size);
478 calc_layout filled in for us */ 735 object_base = off - objoff;
479 req->r_num_pages = calc_pages_for(page_align, *plen); 736 if (truncate_size <= object_base) {
480 req->r_page_alignment = page_align; 737 truncate_size = 0;
738 } else {
739 truncate_size -= object_base;
740 if (truncate_size > object_size)
741 truncate_size = object_size;
742 }
743
744 osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
745 truncate_size, truncate_seq);
746
747 /*
748 * A second op in the ops array means the caller wants to
749 * also issue a include a 'startsync' command so that the
750 * osd will flush data quickly.
751 */
752 if (num_ops > 1)
753 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
754
755 req->r_file_layout = *layout; /* keep a copy */
481 756
482 ceph_osdc_build_request(req, off, *plen, num_op, ops, 757 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
483 snapc, vino.snap, mtime); 758 vino.ino, objnum);
759 req->r_oid_len = strlen(req->r_oid);
484 760
485 return req; 761 return req;
486} 762}
@@ -558,21 +834,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
558 struct ceph_osd *osd) 834 struct ceph_osd *osd)
559{ 835{
560 struct ceph_osd_request *req, *nreq; 836 struct ceph_osd_request *req, *nreq;
837 LIST_HEAD(resend);
561 int err; 838 int err;
562 839
563 dout("__kick_osd_requests osd%d\n", osd->o_osd); 840 dout("__kick_osd_requests osd%d\n", osd->o_osd);
564 err = __reset_osd(osdc, osd); 841 err = __reset_osd(osdc, osd);
565 if (err) 842 if (err)
566 return; 843 return;
567 844 /*
845 * Build up a list of requests to resend by traversing the
846 * osd's list of requests. Requests for a given object are
847 * sent in tid order, and that is also the order they're
848 * kept on this list. Therefore all requests that are in
849 * flight will be found first, followed by all requests that
850 * have not yet been sent. And to resend requests while
851 * preserving this order we will want to put any sent
852 * requests back on the front of the osd client's unsent
853 * list.
854 *
855 * So we build a separate ordered list of already-sent
856 * requests for the affected osd and splice it onto the
857 * front of the osd client's unsent list. Once we've seen a
858 * request that has not yet been sent we're done. Those
859 * requests are already sitting right where they belong.
860 */
568 list_for_each_entry(req, &osd->o_requests, r_osd_item) { 861 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
569 list_move(&req->r_req_lru_item, &osdc->req_unsent); 862 if (!req->r_sent)
570 dout("requeued %p tid %llu osd%d\n", req, req->r_tid, 863 break;
864 list_move_tail(&req->r_req_lru_item, &resend);
865 dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
571 osd->o_osd); 866 osd->o_osd);
572 if (!req->r_linger) 867 if (!req->r_linger)
573 req->r_flags |= CEPH_OSD_FLAG_RETRY; 868 req->r_flags |= CEPH_OSD_FLAG_RETRY;
574 } 869 }
870 list_splice(&resend, &osdc->req_unsent);
575 871
872 /*
873 * Linger requests are re-registered before sending, which
874 * sets up a new tid for each. We add them to the unsent
875 * list at the end to keep things in tid order.
876 */
576 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, 877 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
577 r_linger_osd) { 878 r_linger_osd) {
578 /* 879 /*
@@ -581,8 +882,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
581 */ 882 */
582 BUG_ON(!list_empty(&req->r_req_lru_item)); 883 BUG_ON(!list_empty(&req->r_req_lru_item));
583 __register_request(osdc, req); 884 __register_request(osdc, req);
584 list_add(&req->r_req_lru_item, &osdc->req_unsent); 885 list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
585 list_add(&req->r_osd_item, &req->r_osd->o_requests); 886 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
586 __unregister_linger_request(osdc, req); 887 __unregister_linger_request(osdc, req);
587 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, 888 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
588 osd->o_osd); 889 osd->o_osd);
@@ -654,8 +955,7 @@ static void put_osd(struct ceph_osd *osd)
654 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { 955 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
655 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; 956 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
656 957
657 if (ac->ops && ac->ops->destroy_authorizer) 958 ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
658 ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
659 kfree(osd); 959 kfree(osd);
660 } 960 }
661} 961}
@@ -820,14 +1120,6 @@ static void __register_request(struct ceph_osd_client *osdc,
820 } 1120 }
821} 1121}
822 1122
823static void register_request(struct ceph_osd_client *osdc,
824 struct ceph_osd_request *req)
825{
826 mutex_lock(&osdc->request_mutex);
827 __register_request(osdc, req);
828 mutex_unlock(&osdc->request_mutex);
829}
830
831/* 1123/*
832 * called under osdc->request_mutex 1124 * called under osdc->request_mutex
833 */ 1125 */
@@ -912,6 +1204,7 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
912 mutex_lock(&osdc->request_mutex); 1204 mutex_lock(&osdc->request_mutex);
913 if (req->r_linger) { 1205 if (req->r_linger) {
914 __unregister_linger_request(osdc, req); 1206 __unregister_linger_request(osdc, req);
1207 req->r_linger = 0;
915 ceph_osdc_put_request(req); 1208 ceph_osdc_put_request(req);
916 } 1209 }
917 mutex_unlock(&osdc->request_mutex); 1210 mutex_unlock(&osdc->request_mutex);
@@ -952,8 +1245,8 @@ static int __map_request(struct ceph_osd_client *osdc,
952 int err; 1245 int err;
953 1246
954 dout("map_request %p tid %lld\n", req, req->r_tid); 1247 dout("map_request %p tid %lld\n", req, req->r_tid);
955 err = ceph_calc_object_layout(&pgid, req->r_oid, 1248 err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
956 &req->r_file_layout, osdc->osdmap); 1249 ceph_file_layout_pg_pool(req->r_file_layout));
957 if (err) { 1250 if (err) {
958 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1251 list_move(&req->r_req_lru_item, &osdc->req_notarget);
959 return err; 1252 return err;
@@ -1007,10 +1300,10 @@ static int __map_request(struct ceph_osd_client *osdc,
1007 1300
1008 if (req->r_osd) { 1301 if (req->r_osd) {
1009 __remove_osd_from_lru(req->r_osd); 1302 __remove_osd_from_lru(req->r_osd);
1010 list_add(&req->r_osd_item, &req->r_osd->o_requests); 1303 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
1011 list_move(&req->r_req_lru_item, &osdc->req_unsent); 1304 list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
1012 } else { 1305 } else {
1013 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1306 list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
1014 } 1307 }
1015 err = 1; /* osd or pg changed */ 1308 err = 1; /* osd or pg changed */
1016 1309
@@ -1045,8 +1338,14 @@ static void __send_request(struct ceph_osd_client *osdc,
1045 list_move_tail(&req->r_req_lru_item, &osdc->req_lru); 1338 list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
1046 1339
1047 ceph_msg_get(req->r_request); /* send consumes a ref */ 1340 ceph_msg_get(req->r_request); /* send consumes a ref */
1048 ceph_con_send(&req->r_osd->o_con, req->r_request); 1341
1342 /* Mark the request unsafe if this is the first timet's being sent. */
1343
1344 if (!req->r_sent && req->r_unsafe_callback)
1345 req->r_unsafe_callback(req, true);
1049 req->r_sent = req->r_osd->o_incarnation; 1346 req->r_sent = req->r_osd->o_incarnation;
1347
1348 ceph_con_send(&req->r_osd->o_con, req->r_request);
1050} 1349}
1051 1350
1052/* 1351/*
@@ -1134,31 +1433,11 @@ static void handle_osds_timeout(struct work_struct *work)
1134 1433
1135static void complete_request(struct ceph_osd_request *req) 1434static void complete_request(struct ceph_osd_request *req)
1136{ 1435{
1137 if (req->r_safe_callback) 1436 if (req->r_unsafe_callback)
1138 req->r_safe_callback(req, NULL); 1437 req->r_unsafe_callback(req, false);
1139 complete_all(&req->r_safe_completion); /* fsync waiter */ 1438 complete_all(&req->r_safe_completion); /* fsync waiter */
1140} 1439}
1141 1440
1142static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
1143{
1144 __u8 v;
1145
1146 ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
1147 v = ceph_decode_8(p);
1148 if (v > 1) {
1149 pr_warning("do not understand pg encoding %d > 1", v);
1150 return -EINVAL;
1151 }
1152 pgid->pool = ceph_decode_64(p);
1153 pgid->seed = ceph_decode_32(p);
1154 *p += 4;
1155 return 0;
1156
1157bad:
1158 pr_warning("incomplete pg encoding");
1159 return -EINVAL;
1160}
1161
1162/* 1441/*
1163 * handle osd op reply. either call the callback if it is specified, 1442 * handle osd op reply. either call the callback if it is specified,
1164 * or do the completion to wake up the waiting thread. 1443 * or do the completion to wake up the waiting thread.
@@ -1170,7 +1449,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1170 struct ceph_osd_request *req; 1449 struct ceph_osd_request *req;
1171 u64 tid; 1450 u64 tid;
1172 int object_len; 1451 int object_len;
1173 int numops, payload_len, flags; 1452 unsigned int numops;
1453 int payload_len, flags;
1174 s32 result; 1454 s32 result;
1175 s32 retry_attempt; 1455 s32 retry_attempt;
1176 struct ceph_pg pg; 1456 struct ceph_pg pg;
@@ -1178,7 +1458,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1178 u32 reassert_epoch; 1458 u32 reassert_epoch;
1179 u64 reassert_version; 1459 u64 reassert_version;
1180 u32 osdmap_epoch; 1460 u32 osdmap_epoch;
1181 int i; 1461 int already_completed;
1462 u32 bytes;
1463 unsigned int i;
1182 1464
1183 tid = le64_to_cpu(msg->hdr.tid); 1465 tid = le64_to_cpu(msg->hdr.tid);
1184 dout("handle_reply %p tid %llu\n", msg, tid); 1466 dout("handle_reply %p tid %llu\n", msg, tid);
@@ -1191,7 +1473,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1191 ceph_decode_need(&p, end, object_len, bad); 1473 ceph_decode_need(&p, end, object_len, bad);
1192 p += object_len; 1474 p += object_len;
1193 1475
1194 err = __decode_pgid(&p, end, &pg); 1476 err = ceph_decode_pgid(&p, end, &pg);
1195 if (err) 1477 if (err)
1196 goto bad; 1478 goto bad;
1197 1479
@@ -1207,8 +1489,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1207 req = __lookup_request(osdc, tid); 1489 req = __lookup_request(osdc, tid);
1208 if (req == NULL) { 1490 if (req == NULL) {
1209 dout("handle_reply tid %llu dne\n", tid); 1491 dout("handle_reply tid %llu dne\n", tid);
1210 mutex_unlock(&osdc->request_mutex); 1492 goto bad_mutex;
1211 return;
1212 } 1493 }
1213 ceph_osdc_get_request(req); 1494 ceph_osdc_get_request(req);
1214 1495
@@ -1233,9 +1514,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1233 payload_len += len; 1514 payload_len += len;
1234 p += sizeof(*op); 1515 p += sizeof(*op);
1235 } 1516 }
1236 if (payload_len != le32_to_cpu(msg->hdr.data_len)) { 1517 bytes = le32_to_cpu(msg->hdr.data_len);
1518 if (payload_len != bytes) {
1237 pr_warning("sum of op payload lens %d != data_len %d", 1519 pr_warning("sum of op payload lens %d != data_len %d",
1238 payload_len, le32_to_cpu(msg->hdr.data_len)); 1520 payload_len, bytes);
1239 goto bad_put; 1521 goto bad_put;
1240 } 1522 }
1241 1523
@@ -1244,21 +1526,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1244 for (i = 0; i < numops; i++) 1526 for (i = 0; i < numops; i++)
1245 req->r_reply_op_result[i] = ceph_decode_32(&p); 1527 req->r_reply_op_result[i] = ceph_decode_32(&p);
1246 1528
1247 /*
1248 * if this connection filled our message, drop our reference now, to
1249 * avoid a (safe but slower) revoke later.
1250 */
1251 if (req->r_con_filling_msg == con && req->r_reply == msg) {
1252 dout(" dropping con_filling_msg ref %p\n", con);
1253 req->r_con_filling_msg = NULL;
1254 con->ops->put(con);
1255 }
1256
1257 if (!req->r_got_reply) { 1529 if (!req->r_got_reply) {
1258 unsigned int bytes;
1259 1530
1260 req->r_result = result; 1531 req->r_result = result;
1261 bytes = le32_to_cpu(msg->hdr.data_len);
1262 dout("handle_reply result %d bytes %d\n", req->r_result, 1532 dout("handle_reply result %d bytes %d\n", req->r_result,
1263 bytes); 1533 bytes);
1264 if (req->r_result == 0) 1534 if (req->r_result == 0)
@@ -1286,7 +1556,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1286 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 1556 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
1287 __unregister_request(osdc, req); 1557 __unregister_request(osdc, req);
1288 1558
1559 already_completed = req->r_completed;
1560 req->r_completed = 1;
1289 mutex_unlock(&osdc->request_mutex); 1561 mutex_unlock(&osdc->request_mutex);
1562 if (already_completed)
1563 goto done;
1290 1564
1291 if (req->r_callback) 1565 if (req->r_callback)
1292 req->r_callback(req, msg); 1566 req->r_callback(req, msg);
@@ -1303,6 +1577,8 @@ done:
1303 1577
1304bad_put: 1578bad_put:
1305 ceph_osdc_put_request(req); 1579 ceph_osdc_put_request(req);
1580bad_mutex:
1581 mutex_unlock(&osdc->request_mutex);
1306bad: 1582bad:
1307 pr_err("corrupt osd_op_reply got %d %d\n", 1583 pr_err("corrupt osd_op_reply got %d %d\n",
1308 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); 1584 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -1736,6 +2012,104 @@ bad:
1736} 2012}
1737 2013
1738/* 2014/*
2015 * build new request AND message
2016 *
2017 */
2018void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
2019 struct ceph_snap_context *snapc, u64 snap_id,
2020 struct timespec *mtime)
2021{
2022 struct ceph_msg *msg = req->r_request;
2023 void *p;
2024 size_t msg_size;
2025 int flags = req->r_flags;
2026 u64 data_len;
2027 unsigned int i;
2028
2029 req->r_snapid = snap_id;
2030 req->r_snapc = ceph_get_snap_context(snapc);
2031
2032 /* encode request */
2033 msg->hdr.version = cpu_to_le16(4);
2034
2035 p = msg->front.iov_base;
2036 ceph_encode_32(&p, 1); /* client_inc is always 1 */
2037 req->r_request_osdmap_epoch = p;
2038 p += 4;
2039 req->r_request_flags = p;
2040 p += 4;
2041 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
2042 ceph_encode_timespec(p, mtime);
2043 p += sizeof(struct ceph_timespec);
2044 req->r_request_reassert_version = p;
2045 p += sizeof(struct ceph_eversion); /* will get filled in */
2046
2047 /* oloc */
2048 ceph_encode_8(&p, 4);
2049 ceph_encode_8(&p, 4);
2050 ceph_encode_32(&p, 8 + 4 + 4);
2051 req->r_request_pool = p;
2052 p += 8;
2053 ceph_encode_32(&p, -1); /* preferred */
2054 ceph_encode_32(&p, 0); /* key len */
2055
2056 ceph_encode_8(&p, 1);
2057 req->r_request_pgid = p;
2058 p += 8 + 4;
2059 ceph_encode_32(&p, -1); /* preferred */
2060
2061 /* oid */
2062 ceph_encode_32(&p, req->r_oid_len);
2063 memcpy(p, req->r_oid, req->r_oid_len);
2064 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
2065 p += req->r_oid_len;
2066
2067 /* ops--can imply data */
2068 ceph_encode_16(&p, (u16)req->r_num_ops);
2069 data_len = 0;
2070 for (i = 0; i < req->r_num_ops; i++) {
2071 data_len += osd_req_encode_op(req, p, i);
2072 p += sizeof(struct ceph_osd_op);
2073 }
2074
2075 /* snaps */
2076 ceph_encode_64(&p, req->r_snapid);
2077 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
2078 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
2079 if (req->r_snapc) {
2080 for (i = 0; i < snapc->num_snaps; i++) {
2081 ceph_encode_64(&p, req->r_snapc->snaps[i]);
2082 }
2083 }
2084
2085 req->r_request_attempts = p;
2086 p += 4;
2087
2088 /* data */
2089 if (flags & CEPH_OSD_FLAG_WRITE) {
2090 u16 data_off;
2091
2092 /*
2093 * The header "data_off" is a hint to the receiver
2094 * allowing it to align received data into its
2095 * buffers such that there's no need to re-copy
2096 * it before writing it to disk (direct I/O).
2097 */
2098 data_off = (u16) (off & 0xffff);
2099 req->r_request->hdr.data_off = cpu_to_le16(data_off);
2100 }
2101 req->r_request->hdr.data_len = cpu_to_le32(data_len);
2102
2103 BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
2104 msg_size = p - msg->front.iov_base;
2105 msg->front.iov_len = msg_size;
2106 msg->hdr.front_len = cpu_to_le32(msg_size);
2107
2108 dout("build_request msg_size was %d\n", (int)msg_size);
2109}
2110EXPORT_SYMBOL(ceph_osdc_build_request);
2111
2112/*
1739 * Register request, send initial attempt. 2113 * Register request, send initial attempt.
1740 */ 2114 */
1741int ceph_osdc_start_request(struct ceph_osd_client *osdc, 2115int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1744,41 +2118,28 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1744{ 2118{
1745 int rc = 0; 2119 int rc = 0;
1746 2120
1747 req->r_request->pages = req->r_pages;
1748 req->r_request->nr_pages = req->r_num_pages;
1749#ifdef CONFIG_BLOCK
1750 req->r_request->bio = req->r_bio;
1751#endif
1752 req->r_request->trail = &req->r_trail;
1753
1754 register_request(osdc, req);
1755
1756 down_read(&osdc->map_sem); 2121 down_read(&osdc->map_sem);
1757 mutex_lock(&osdc->request_mutex); 2122 mutex_lock(&osdc->request_mutex);
1758 /* 2123 __register_request(osdc, req);
1759 * a racing kick_requests() may have sent the message for us 2124 req->r_sent = 0;
1760 * while we dropped request_mutex above, so only send now if 2125 req->r_got_reply = 0;
1761 * the request still han't been touched yet. 2126 req->r_completed = 0;
1762 */ 2127 rc = __map_request(osdc, req, 0);
1763 if (req->r_sent == 0) { 2128 if (rc < 0) {
1764 rc = __map_request(osdc, req, 0); 2129 if (nofail) {
1765 if (rc < 0) { 2130 dout("osdc_start_request failed map, "
1766 if (nofail) { 2131 " will retry %lld\n", req->r_tid);
1767 dout("osdc_start_request failed map, " 2132 rc = 0;
1768 " will retry %lld\n", req->r_tid);
1769 rc = 0;
1770 }
1771 goto out_unlock;
1772 }
1773 if (req->r_osd == NULL) {
1774 dout("send_request %p no up osds in pg\n", req);
1775 ceph_monc_request_next_osdmap(&osdc->client->monc);
1776 } else {
1777 __send_request(osdc, req);
1778 } 2133 }
1779 rc = 0; 2134 goto out_unlock;
1780 } 2135 }
1781 2136 if (req->r_osd == NULL) {
2137 dout("send_request %p no up osds in pg\n", req);
2138 ceph_monc_request_next_osdmap(&osdc->client->monc);
2139 } else {
2140 __send_queued(osdc);
2141 }
2142 rc = 0;
1782out_unlock: 2143out_unlock:
1783 mutex_unlock(&osdc->request_mutex); 2144 mutex_unlock(&osdc->request_mutex);
1784 up_read(&osdc->map_sem); 2145 up_read(&osdc->map_sem);
@@ -1940,18 +2301,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1940 2301
1941 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, 2302 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
1942 vino.snap, off, *plen); 2303 vino.snap, off, *plen);
1943 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 2304 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
1944 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2305 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1945 NULL, 0, truncate_seq, truncate_size, NULL, 2306 NULL, truncate_seq, truncate_size,
1946 false, page_align); 2307 false);
1947 if (IS_ERR(req)) 2308 if (IS_ERR(req))
1948 return PTR_ERR(req); 2309 return PTR_ERR(req);
1949 2310
1950 /* it may be a short read due to an object boundary */ 2311 /* it may be a short read due to an object boundary */
1951 req->r_pages = pages;
1952 2312
1953 dout("readpages final extent is %llu~%llu (%d pages align %d)\n", 2313 osd_req_op_extent_osd_data_pages(req, 0,
1954 off, *plen, req->r_num_pages, page_align); 2314 pages, *plen, page_align, false, false);
2315
2316 dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
2317 off, *plen, *plen, page_align);
2318
2319 ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
1955 2320
1956 rc = ceph_osdc_start_request(osdc, req, false); 2321 rc = ceph_osdc_start_request(osdc, req, false);
1957 if (!rc) 2322 if (!rc)
@@ -1978,20 +2343,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1978 int rc = 0; 2343 int rc = 0;
1979 int page_align = off & ~PAGE_MASK; 2344 int page_align = off & ~PAGE_MASK;
1980 2345
1981 BUG_ON(vino.snap != CEPH_NOSNAP); 2346 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
1982 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 2347 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
1983 CEPH_OSD_OP_WRITE, 2348 CEPH_OSD_OP_WRITE,
1984 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, 2349 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1985 snapc, 0, 2350 snapc, truncate_seq, truncate_size,
1986 truncate_seq, truncate_size, mtime, 2351 true);
1987 true, page_align);
1988 if (IS_ERR(req)) 2352 if (IS_ERR(req))
1989 return PTR_ERR(req); 2353 return PTR_ERR(req);
1990 2354
1991 /* it may be a short write due to an object boundary */ 2355 /* it may be a short write due to an object boundary */
1992 req->r_pages = pages; 2356 osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
1993 dout("writepages %llu~%llu (%d pages)\n", off, len, 2357 false, false);
1994 req->r_num_pages); 2358 dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
2359
2360 ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
1995 2361
1996 rc = ceph_osdc_start_request(osdc, req, true); 2362 rc = ceph_osdc_start_request(osdc, req, true);
1997 if (!rc) 2363 if (!rc)
@@ -2005,6 +2371,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2005} 2371}
2006EXPORT_SYMBOL(ceph_osdc_writepages); 2372EXPORT_SYMBOL(ceph_osdc_writepages);
2007 2373
2374int ceph_osdc_setup(void)
2375{
2376 BUG_ON(ceph_osd_request_cache);
2377 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
2378 sizeof (struct ceph_osd_request),
2379 __alignof__(struct ceph_osd_request),
2380 0, NULL);
2381
2382 return ceph_osd_request_cache ? 0 : -ENOMEM;
2383}
2384EXPORT_SYMBOL(ceph_osdc_setup);
2385
2386void ceph_osdc_cleanup(void)
2387{
2388 BUG_ON(!ceph_osd_request_cache);
2389 kmem_cache_destroy(ceph_osd_request_cache);
2390 ceph_osd_request_cache = NULL;
2391}
2392EXPORT_SYMBOL(ceph_osdc_cleanup);
2393
2008/* 2394/*
2009 * handle incoming message 2395 * handle incoming message
2010 */ 2396 */
@@ -2064,13 +2450,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2064 goto out; 2450 goto out;
2065 } 2451 }
2066 2452
2067 if (req->r_con_filling_msg) { 2453 if (req->r_reply->con)
2068 dout("%s revoking msg %p from old con %p\n", __func__, 2454 dout("%s revoking msg %p from old con %p\n", __func__,
2069 req->r_reply, req->r_con_filling_msg); 2455 req->r_reply, req->r_reply->con);
2070 ceph_msg_revoke_incoming(req->r_reply); 2456 ceph_msg_revoke_incoming(req->r_reply);
2071 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
2072 req->r_con_filling_msg = NULL;
2073 }
2074 2457
2075 if (front > req->r_reply->front.iov_len) { 2458 if (front > req->r_reply->front.iov_len) {
2076 pr_warning("get_reply front %d > preallocated %d\n", 2459 pr_warning("get_reply front %d > preallocated %d\n",
@@ -2084,26 +2467,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2084 m = ceph_msg_get(req->r_reply); 2467 m = ceph_msg_get(req->r_reply);
2085 2468
2086 if (data_len > 0) { 2469 if (data_len > 0) {
2087 int want = calc_pages_for(req->r_page_alignment, data_len); 2470 struct ceph_osd_data *osd_data;
2088 2471
2089 if (req->r_pages && unlikely(req->r_num_pages < want)) { 2472 /*
2090 pr_warning("tid %lld reply has %d bytes %d pages, we" 2473 * XXX This is assuming there is only one op containing
2091 " had only %d pages ready\n", tid, data_len, 2474 * XXX page data. Probably OK for reads, but this
2092 want, req->r_num_pages); 2475 * XXX ought to be done more generally.
2093 *skip = 1; 2476 */
2094 ceph_msg_put(m); 2477 osd_data = osd_req_op_extent_osd_data(req, 0);
2095 m = NULL; 2478 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
2096 goto out; 2479 if (osd_data->pages &&
2480 unlikely(osd_data->length < data_len)) {
2481
2482 pr_warning("tid %lld reply has %d bytes "
2483 "we had only %llu bytes ready\n",
2484 tid, data_len, osd_data->length);
2485 *skip = 1;
2486 ceph_msg_put(m);
2487 m = NULL;
2488 goto out;
2489 }
2097 } 2490 }
2098 m->pages = req->r_pages;
2099 m->nr_pages = req->r_num_pages;
2100 m->page_alignment = req->r_page_alignment;
2101#ifdef CONFIG_BLOCK
2102 m->bio = req->r_bio;
2103#endif
2104 } 2491 }
2105 *skip = 0; 2492 *skip = 0;
2106 req->r_con_filling_msg = con->ops->get(con);
2107 dout("get_reply tid %lld %p\n", tid, m); 2493 dout("get_reply tid %lld %p\n", tid, m);
2108 2494
2109out: 2495out:
@@ -2168,13 +2554,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
2168 struct ceph_auth_handshake *auth = &o->o_auth; 2554 struct ceph_auth_handshake *auth = &o->o_auth;
2169 2555
2170 if (force_new && auth->authorizer) { 2556 if (force_new && auth->authorizer) {
2171 if (ac->ops && ac->ops->destroy_authorizer) 2557 ceph_auth_destroy_authorizer(ac, auth->authorizer);
2172 ac->ops->destroy_authorizer(ac, auth->authorizer);
2173 auth->authorizer = NULL; 2558 auth->authorizer = NULL;
2174 } 2559 }
2175 if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { 2560 if (!auth->authorizer) {
2176 int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD, 2561 int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2177 auth); 2562 auth);
2563 if (ret)
2564 return ERR_PTR(ret);
2565 } else {
2566 int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2567 auth);
2178 if (ret) 2568 if (ret)
2179 return ERR_PTR(ret); 2569 return ERR_PTR(ret);
2180 } 2570 }
@@ -2190,11 +2580,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
2190 struct ceph_osd_client *osdc = o->o_osdc; 2580 struct ceph_osd_client *osdc = o->o_osdc;
2191 struct ceph_auth_client *ac = osdc->client->monc.auth; 2581 struct ceph_auth_client *ac = osdc->client->monc.auth;
2192 2582
2193 /* 2583 return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2194 * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
2195 * XXX which do we do: succeed or fail?
2196 */
2197 return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2198} 2584}
2199 2585
2200static int invalidate_authorizer(struct ceph_connection *con) 2586static int invalidate_authorizer(struct ceph_connection *con)
@@ -2203,9 +2589,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
2203 struct ceph_osd_client *osdc = o->o_osdc; 2589 struct ceph_osd_client *osdc = o->o_osdc;
2204 struct ceph_auth_client *ac = osdc->client->monc.auth; 2590 struct ceph_auth_client *ac = osdc->client->monc.auth;
2205 2591
2206 if (ac->ops && ac->ops->invalidate_authorizer) 2592 ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2207 ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2208
2209 return ceph_monc_validate_auth(&osdc->client->monc); 2593 return ceph_monc_validate_auth(&osdc->client->monc);
2210} 2594}
2211 2595
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4543b9aba40c..603ddd92db19 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -654,24 +654,6 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
654 return 0; 654 return 0;
655} 655}
656 656
657static int __decode_pgid(void **p, void *end, struct ceph_pg *pg)
658{
659 u8 v;
660
661 ceph_decode_need(p, end, 1+8+4+4, bad);
662 v = ceph_decode_8(p);
663 if (v != 1)
664 goto bad;
665 pg->pool = ceph_decode_64(p);
666 pg->seed = ceph_decode_32(p);
667 *p += 4; /* skip preferred */
668 return 0;
669
670bad:
671 dout("error decoding pgid\n");
672 return -EINVAL;
673}
674
675/* 657/*
676 * decode a full map. 658 * decode a full map.
677 */ 659 */
@@ -765,7 +747,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
765 struct ceph_pg pgid; 747 struct ceph_pg pgid;
766 struct ceph_pg_mapping *pg; 748 struct ceph_pg_mapping *pg;
767 749
768 err = __decode_pgid(p, end, &pgid); 750 err = ceph_decode_pgid(p, end, &pgid);
769 if (err) 751 if (err)
770 goto bad; 752 goto bad;
771 ceph_decode_need(p, end, sizeof(u32), bad); 753 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -983,7 +965,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
983 struct ceph_pg pgid; 965 struct ceph_pg pgid;
984 u32 pglen; 966 u32 pglen;
985 967
986 err = __decode_pgid(p, end, &pgid); 968 err = ceph_decode_pgid(p, end, &pgid);
987 if (err) 969 if (err)
988 goto bad; 970 goto bad;
989 ceph_decode_need(p, end, sizeof(u32), bad); 971 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -1111,27 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
1111 * calculate an object layout (i.e. pgid) from an oid, 1093 * calculate an object layout (i.e. pgid) from an oid,
1112 * file_layout, and osdmap 1094 * file_layout, and osdmap
1113 */ 1095 */
1114int ceph_calc_object_layout(struct ceph_pg *pg, 1096int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
1115 const char *oid, 1097 struct ceph_osdmap *osdmap, uint64_t pool)
1116 struct ceph_file_layout *fl,
1117 struct ceph_osdmap *osdmap)
1118{ 1098{
1119 unsigned int num, num_mask; 1099 struct ceph_pg_pool_info *pool_info;
1120 struct ceph_pg_pool_info *pool;
1121 1100
1122 BUG_ON(!osdmap); 1101 BUG_ON(!osdmap);
1123 pg->pool = le32_to_cpu(fl->fl_pg_pool); 1102 pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
1124 pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); 1103 if (!pool_info)
1125 if (!pool)
1126 return -EIO; 1104 return -EIO;
1127 pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); 1105 pg->pool = pool;
1128 num = pool->pg_num; 1106 pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
1129 num_mask = pool->pg_num_mask;
1130 1107
1131 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); 1108 dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
1132 return 0; 1109 return 0;
1133} 1110}
1134EXPORT_SYMBOL(ceph_calc_object_layout); 1111EXPORT_SYMBOL(ceph_calc_ceph_pg);
1135 1112
1136/* 1113/*
1137 * Calculate raw osd vector for the given pgid. Return pointer to osd 1114 * Calculate raw osd vector for the given pgid. Return pointer to osd
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 000000000000..154683f5f14c
--- /dev/null
+++ b/net/ceph/snapshot.c
@@ -0,0 +1,78 @@
1/*
2 * snapshot.c Ceph snapshot context utility routines (part of libceph)
3 *
4 * Copyright (C) 2013 Inktank Storage, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
19 */
20
21#include <stddef.h>
22
23#include <linux/types.h>
24#include <linux/export.h>
25#include <linux/ceph/libceph.h>
26
27/*
28 * Ceph snapshot contexts are reference counted objects, and the
29 * returned structure holds a single reference. Acquire additional
30 * references with ceph_get_snap_context(), and release them with
31 * ceph_put_snap_context(). When the reference count reaches zero
32 * the entire structure is freed.
33 */
34
35/*
36 * Create a new ceph snapshot context large enough to hold the
37 * indicated number of snapshot ids (which can be 0). Caller has
38 * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
39 *
40 * Returns a null pointer if an error occurs.
41 */
42struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
43 gfp_t gfp_flags)
44{
45 struct ceph_snap_context *snapc;
46 size_t size;
47
48 size = sizeof (struct ceph_snap_context);
49 size += snap_count * sizeof (snapc->snaps[0]);
50 snapc = kzalloc(size, gfp_flags);
51 if (!snapc)
52 return NULL;
53
54 atomic_set(&snapc->nref, 1);
55 snapc->num_snaps = snap_count;
56
57 return snapc;
58}
59EXPORT_SYMBOL(ceph_create_snap_context);
60
61struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
62{
63 if (sc)
64 atomic_inc(&sc->nref);
65 return sc;
66}
67EXPORT_SYMBOL(ceph_get_snap_context);
68
69void ceph_put_snap_context(struct ceph_snap_context *sc)
70{
71 if (!sc)
72 return;
73 if (atomic_dec_and_test(&sc->nref)) {
74 /*printk(" deleting snap_context %p\n", sc);*/
75 kfree(sc);
76 }
77}
78EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4040673f806a..fc1e289397f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2213,6 +2213,17 @@ __be16 skb_network_protocol(struct sk_buff *skb)
2213 __be16 type = skb->protocol; 2213 __be16 type = skb->protocol;
2214 int vlan_depth = ETH_HLEN; 2214 int vlan_depth = ETH_HLEN;
2215 2215
2216 /* Tunnel gso handlers can set protocol to ethernet. */
2217 if (type == htons(ETH_P_TEB)) {
2218 struct ethhdr *eth;
2219
2220 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2221 return 0;
2222
2223 eth = (struct ethhdr *)skb_mac_header(skb);
2224 type = eth->h_proto;
2225 }
2226
2216 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { 2227 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2217 struct vlan_hdr *vh; 2228 struct vlan_hdr *vh;
2218 2229
@@ -2456,7 +2467,7 @@ EXPORT_SYMBOL(netif_skb_features);
2456 * 2. skb is fragmented and the device does not support SG. 2467 * 2. skb is fragmented and the device does not support SG.
2457 */ 2468 */
2458static inline int skb_needs_linearize(struct sk_buff *skb, 2469static inline int skb_needs_linearize(struct sk_buff *skb,
2459 int features) 2470 netdev_features_t features)
2460{ 2471{
2461 return skb_is_nonlinear(skb) && 2472 return skb_is_nonlinear(skb) &&
2462 ((skb_has_frag_list(skb) && 2473 ((skb_has_frag_list(skb) &&
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5a934ef90f8b..22efdaa76ebf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1421,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1421 void __user *useraddr = ifr->ifr_data; 1421 void __user *useraddr = ifr->ifr_data;
1422 u32 ethcmd; 1422 u32 ethcmd;
1423 int rc; 1423 int rc;
1424 u32 old_features; 1424 netdev_features_t old_features;
1425 1425
1426 if (!dev || !netif_device_present(dev)) 1426 if (!dev || !netif_device_present(dev))
1427 return -ENODEV; 1427 return -ENODEV;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7427ab5e27d8..981fed397d1d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,21 +606,11 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
606 return sprintf(buf, "%lu\n", val); 606 return sprintf(buf, "%lu\n", val);
607} 607}
608 608
609static void rps_dev_flow_table_release_work(struct work_struct *work)
610{
611 struct rps_dev_flow_table *table = container_of(work,
612 struct rps_dev_flow_table, free_work);
613
614 vfree(table);
615}
616
617static void rps_dev_flow_table_release(struct rcu_head *rcu) 609static void rps_dev_flow_table_release(struct rcu_head *rcu)
618{ 610{
619 struct rps_dev_flow_table *table = container_of(rcu, 611 struct rps_dev_flow_table *table = container_of(rcu,
620 struct rps_dev_flow_table, rcu); 612 struct rps_dev_flow_table, rcu);
621 613 vfree(table);
622 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
623 schedule_work(&table->free_work);
624} 614}
625 615
626static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 616static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a5802a8b652f..cec074be8c43 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -206,7 +206,7 @@ static void netpoll_poll_dev(struct net_device *dev)
206 * the dev_open/close paths use this to block netpoll activity 206 * the dev_open/close paths use this to block netpoll activity
207 * while changing device state 207 * while changing device state
208 */ 208 */
209 if (!down_trylock(&ni->dev_lock)) 209 if (down_trylock(&ni->dev_lock))
210 return; 210 return;
211 211
212 if (!netif_running(dev)) { 212 if (!netif_running(dev)) {
diff --git a/net/core/sock.c b/net/core/sock.c
index d4f4cea726e7..6ba327da79e1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1217,18 +1217,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
1217#endif 1217#endif
1218} 1218}
1219 1219
1220/*
1221 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
1222 * un-modified. Special care is taken when initializing object to zero.
1223 */
1224static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1225{
1226 if (offsetof(struct sock, sk_node.next) != 0)
1227 memset(sk, 0, offsetof(struct sock, sk_node.next));
1228 memset(&sk->sk_node.pprev, 0,
1229 size - offsetof(struct sock, sk_node.pprev));
1230}
1231
1232void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) 1220void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1233{ 1221{
1234 unsigned long nulls1, nulls2; 1222 unsigned long nulls1, nulls2;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c61b3bb87a16..d01be2a3ae53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1293,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1293 SKB_GSO_DODGY | 1293 SKB_GSO_DODGY |
1294 SKB_GSO_TCP_ECN | 1294 SKB_GSO_TCP_ECN |
1295 SKB_GSO_GRE | 1295 SKB_GSO_GRE |
1296 SKB_GSO_TCPV6 |
1296 SKB_GSO_UDP_TUNNEL | 1297 SKB_GSO_UDP_TUNNEL |
1297 0))) 1298 0)))
1298 goto out; 1299 goto out;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff06b7543d9f..49616fed9340 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -125,7 +125,6 @@ struct tnode {
125 unsigned int empty_children; /* KEYLENGTH bits needed */ 125 unsigned int empty_children; /* KEYLENGTH bits needed */
126 union { 126 union {
127 struct rcu_head rcu; 127 struct rcu_head rcu;
128 struct work_struct work;
129 struct tnode *tnode_free; 128 struct tnode *tnode_free;
130 }; 129 };
131 struct rt_trie_node __rcu *child[0]; 130 struct rt_trie_node __rcu *child[0];
@@ -383,12 +382,6 @@ static struct tnode *tnode_alloc(size_t size)
383 return vzalloc(size); 382 return vzalloc(size);
384} 383}
385 384
386static void __tnode_vfree(struct work_struct *arg)
387{
388 struct tnode *tn = container_of(arg, struct tnode, work);
389 vfree(tn);
390}
391
392static void __tnode_free_rcu(struct rcu_head *head) 385static void __tnode_free_rcu(struct rcu_head *head)
393{ 386{
394 struct tnode *tn = container_of(head, struct tnode, rcu); 387 struct tnode *tn = container_of(head, struct tnode, rcu);
@@ -397,10 +390,8 @@ static void __tnode_free_rcu(struct rcu_head *head)
397 390
398 if (size <= PAGE_SIZE) 391 if (size <= PAGE_SIZE)
399 kfree(tn); 392 kfree(tn);
400 else { 393 else
401 INIT_WORK(&tn->work, __tnode_vfree); 394 vfree(tn);
402 schedule_work(&tn->work);
403 }
404} 395}
405 396
406static inline void tnode_free(struct tnode *tn) 397static inline void tnode_free(struct tnode *tn)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index d2d5a99fba09..b2e805af9b87 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -121,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
121 int ghl = GRE_HEADER_SECTION; 121 int ghl = GRE_HEADER_SECTION;
122 struct gre_base_hdr *greh; 122 struct gre_base_hdr *greh;
123 int mac_len = skb->mac_len; 123 int mac_len = skb->mac_len;
124 __be16 protocol = skb->protocol;
124 int tnl_hlen; 125 int tnl_hlen;
125 bool csum; 126 bool csum;
126 127
@@ -149,13 +150,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
149 csum = false; 150 csum = false;
150 151
151 /* setup inner skb. */ 152 /* setup inner skb. */
152 if (greh->protocol == htons(ETH_P_TEB)) { 153 skb->protocol = greh->protocol;
153 struct ethhdr *eth = eth_hdr(skb);
154 skb->protocol = eth->h_proto;
155 } else {
156 skb->protocol = greh->protocol;
157 }
158
159 skb->encapsulation = 0; 154 skb->encapsulation = 0;
160 155
161 if (unlikely(!pskb_may_pull(skb, ghl))) 156 if (unlikely(!pskb_may_pull(skb, ghl)))
@@ -199,6 +194,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
199 skb_reset_mac_header(skb); 194 skb_reset_mac_header(skb);
200 skb_set_network_header(skb, mac_len); 195 skb_set_network_header(skb, mac_len);
201 skb->mac_len = mac_len; 196 skb->mac_len = mac_len;
197 skb->protocol = protocol;
202 } while ((skb = skb->next)); 198 } while ((skb = skb->next));
203out: 199out:
204 return segs; 200 return segs;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e97d66a1fdde..7e06641e36ae 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -305,6 +305,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
305 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 305 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
306 spin_lock_init(&q->lock); 306 spin_lock_init(&q->lock);
307 atomic_set(&q->refcnt, 1); 307 atomic_set(&q->refcnt, 1);
308 INIT_LIST_HEAD(&q->lru_list);
308 309
309 return q; 310 return q;
310} 311}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 147abf5275aa..4bcabf3ab4ca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -84,7 +84,7 @@ int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
84EXPORT_SYMBOL(sysctl_ip_default_ttl); 84EXPORT_SYMBOL(sysctl_ip_default_ttl);
85 85
86/* Generate a checksum for an outgoing IP datagram. */ 86/* Generate a checksum for an outgoing IP datagram. */
87__inline__ void ip_send_check(struct iphdr *iph) 87void ip_send_check(struct iphdr *iph)
88{ 88{
89 iph->check = 0; 89 iph->check = 0;
90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f696d7c2e9fa..f6a005c485a9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -96,7 +96,8 @@ struct tcpm_hash_bucket {
96 96
97static DEFINE_SPINLOCK(tcp_metrics_lock); 97static DEFINE_SPINLOCK(tcp_metrics_lock);
98 98
99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) 99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
100 bool fastopen_clear)
100{ 101{
101 u32 val; 102 u32 val;
102 103
@@ -122,9 +123,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
122 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 123 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
123 tm->tcpm_ts = 0; 124 tm->tcpm_ts = 0;
124 tm->tcpm_ts_stamp = 0; 125 tm->tcpm_ts_stamp = 0;
125 tm->tcpm_fastopen.mss = 0; 126 if (fastopen_clear) {
126 tm->tcpm_fastopen.syn_loss = 0; 127 tm->tcpm_fastopen.mss = 0;
127 tm->tcpm_fastopen.cookie.len = 0; 128 tm->tcpm_fastopen.syn_loss = 0;
129 tm->tcpm_fastopen.cookie.len = 0;
130 }
128} 131}
129 132
130static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 133static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -154,7 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
154 } 157 }
155 tm->tcpm_addr = *addr; 158 tm->tcpm_addr = *addr;
156 159
157 tcpm_suck_dst(tm, dst); 160 tcpm_suck_dst(tm, dst, true);
158 161
159 if (likely(!reclaim)) { 162 if (likely(!reclaim)) {
160 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; 163 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
@@ -171,7 +174,7 @@ out_unlock:
171static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) 174static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
172{ 175{
173 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) 176 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
174 tcpm_suck_dst(tm, dst); 177 tcpm_suck_dst(tm, dst, false);
175} 178}
176 179
177#define TCP_METRICS_RECLAIM_DEPTH 5 180#define TCP_METRICS_RECLAIM_DEPTH 5
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6abbe6455129..0bf5d399a03c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2311,8 +2311,9 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2311 struct sk_buff *segs = ERR_PTR(-EINVAL); 2311 struct sk_buff *segs = ERR_PTR(-EINVAL);
2312 int mac_len = skb->mac_len; 2312 int mac_len = skb->mac_len;
2313 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 2313 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
2314 int outer_hlen; 2314 __be16 protocol = skb->protocol;
2315 netdev_features_t enc_features; 2315 netdev_features_t enc_features;
2316 int outer_hlen;
2316 2317
2317 if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 2318 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
2318 goto out; 2319 goto out;
@@ -2322,6 +2323,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2322 skb_reset_mac_header(skb); 2323 skb_reset_mac_header(skb);
2323 skb_set_network_header(skb, skb_inner_network_offset(skb)); 2324 skb_set_network_header(skb, skb_inner_network_offset(skb));
2324 skb->mac_len = skb_inner_network_offset(skb); 2325 skb->mac_len = skb_inner_network_offset(skb);
2326 skb->protocol = htons(ETH_P_TEB);
2325 2327
2326 /* segment inner packet. */ 2328 /* segment inner packet. */
2327 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); 2329 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
@@ -2358,6 +2360,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2358 2360
2359 } 2361 }
2360 skb->ip_summed = CHECKSUM_NONE; 2362 skb->ip_summed = CHECKSUM_NONE;
2363 skb->protocol = protocol;
2361 } while ((skb = skb->next)); 2364 } while ((skb = skb->next));
2362out: 2365out:
2363 return segs; 2366 return segs;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d3ddd8400354..ecd60733e5e2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1081,6 +1081,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
1081 } 1081 }
1082 if (t == NULL) 1082 if (t == NULL)
1083 t = netdev_priv(dev); 1083 t = netdev_priv(dev);
1084 memset(&p, 0, sizeof(p));
1084 ip6gre_tnl_parm_to_user(&p, &t->parms); 1085 ip6gre_tnl_parm_to_user(&p, &t->parms);
1085 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 1086 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1086 err = -EFAULT; 1087 err = -EFAULT;
@@ -1128,6 +1129,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
1128 if (t) { 1129 if (t) {
1129 err = 0; 1130 err = 0;
1130 1131
1132 memset(&p, 0, sizeof(p));
1131 ip6gre_tnl_parm_to_user(&p, &t->parms); 1133 ip6gre_tnl_parm_to_user(&p, &t->parms);
1132 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 1134 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1133 err = -EFAULT; 1135 err = -EFAULT;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 71167069b394..0a17ed9eaf39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1890,6 +1890,17 @@ void tcp6_proc_exit(struct net *net)
1890} 1890}
1891#endif 1891#endif
1892 1892
1893static void tcp_v6_clear_sk(struct sock *sk, int size)
1894{
1895 struct inet_sock *inet = inet_sk(sk);
1896
1897 /* we do not want to clear pinet6 field, because of RCU lookups */
1898 sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
1899
1900 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1901 memset(&inet->pinet6 + 1, 0, size);
1902}
1903
1893struct proto tcpv6_prot = { 1904struct proto tcpv6_prot = {
1894 .name = "TCPv6", 1905 .name = "TCPv6",
1895 .owner = THIS_MODULE, 1906 .owner = THIS_MODULE,
@@ -1933,6 +1944,7 @@ struct proto tcpv6_prot = {
1933#ifdef CONFIG_MEMCG_KMEM 1944#ifdef CONFIG_MEMCG_KMEM
1934 .proto_cgroup = tcp_proto_cgroup, 1945 .proto_cgroup = tcp_proto_cgroup,
1935#endif 1946#endif
1947 .clear_sk = tcp_v6_clear_sk,
1936}; 1948};
1937 1949
1938static const struct inet6_protocol tcpv6_protocol = { 1950static const struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d4defdd44937..42923b14dfa6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1432,6 +1432,17 @@ void udp6_proc_exit(struct net *net) {
1432} 1432}
1433#endif /* CONFIG_PROC_FS */ 1433#endif /* CONFIG_PROC_FS */
1434 1434
1435void udp_v6_clear_sk(struct sock *sk, int size)
1436{
1437 struct inet_sock *inet = inet_sk(sk);
1438
1439 /* we do not want to clear pinet6 field, because of RCU lookups */
1440 sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
1441
1442 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1443 memset(&inet->pinet6 + 1, 0, size);
1444}
1445
1435/* ------------------------------------------------------------------------ */ 1446/* ------------------------------------------------------------------------ */
1436 1447
1437struct proto udpv6_prot = { 1448struct proto udpv6_prot = {
@@ -1462,7 +1473,7 @@ struct proto udpv6_prot = {
1462 .compat_setsockopt = compat_udpv6_setsockopt, 1473 .compat_setsockopt = compat_udpv6_setsockopt,
1463 .compat_getsockopt = compat_udpv6_getsockopt, 1474 .compat_getsockopt = compat_udpv6_getsockopt,
1464#endif 1475#endif
1465 .clear_sk = sk_prot_clear_portaddr_nulls, 1476 .clear_sk = udp_v6_clear_sk,
1466}; 1477};
1467 1478
1468static struct inet_protosw udpv6_protosw = { 1479static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index d7571046bfc4..4691ed50a928 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -31,6 +31,8 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
31extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); 31extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
32extern void udpv6_destroy_sock(struct sock *sk); 32extern void udpv6_destroy_sock(struct sock *sk);
33 33
34extern void udp_v6_clear_sk(struct sock *sk, int size);
35
34#ifdef CONFIG_PROC_FS 36#ifdef CONFIG_PROC_FS
35extern int udp6_seq_show(struct seq_file *seq, void *v); 37extern int udp6_seq_show(struct seq_file *seq, void *v);
36#endif 38#endif
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 1d08e21d9f69..dfcc4be46898 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -56,7 +56,7 @@ struct proto udplitev6_prot = {
56 .compat_setsockopt = compat_udpv6_setsockopt, 56 .compat_setsockopt = compat_udpv6_setsockopt,
57 .compat_getsockopt = compat_udpv6_getsockopt, 57 .compat_getsockopt = compat_udpv6_getsockopt,
58#endif 58#endif
59 .clear_sk = sk_prot_clear_portaddr_nulls, 59 .clear_sk = udp_v6_clear_sk,
60}; 60};
61 61
62static struct inet_protosw udplite6_protosw = { 62static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4ef7bdb65440..23ed03d786c8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -103,8 +103,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
103 dev_hold(dev); 103 dev_hold(dev);
104 104
105 xdst->u.rt6.rt6i_idev = in6_dev_get(dev); 105 xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
106 if (!xdst->u.rt6.rt6i_idev) 106 if (!xdst->u.rt6.rt6i_idev) {
107 dev_put(dev);
107 return -ENODEV; 108 return -ENODEV;
109 }
108 110
109 rt6_transfer_peer(&xdst->u.rt6, rt); 111 rt6_transfer_peer(&xdst->u.rt6, rt);
110 112
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dd5cd49b0e09..8ec1bca7f859 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -742,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
742 742
743 smp_rmb(); 743 smp_rmb();
744 744
745 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { 745 /* We could have just memset this but we will lose the
746 * flexibility of making the priv area sticky
747 */
746 748
747 /* We could have just memset this but we will lose the 749 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
748 * flexibility of making the priv area sticky 750 BLOCK_NUM_PKTS(pbd1) = 0;
749 */ 751 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
750 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
751 BLOCK_NUM_PKTS(pbd1) = 0;
752 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
753 getnstimeofday(&ts);
754 h1->ts_first_pkt.ts_sec = ts.tv_sec;
755 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
756 pkc1->pkblk_start = (char *)pbd1;
757 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
758 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
759 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
760 pbd1->version = pkc1->version;
761 pkc1->prev = pkc1->nxt_offset;
762 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
763 prb_thaw_queue(pkc1);
764 _prb_refresh_rx_retire_blk_timer(pkc1);
765 752
766 smp_wmb(); 753 getnstimeofday(&ts);
767 754
768 return; 755 h1->ts_first_pkt.ts_sec = ts.tv_sec;
769 } 756 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
770 757
771 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", 758 pkc1->pkblk_start = (char *)pbd1;
772 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); 759 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
773 dump_stack(); 760
774 BUG(); 761 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
762 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
763
764 pbd1->version = pkc1->version;
765 pkc1->prev = pkc1->nxt_offset;
766 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
767
768 prb_thaw_queue(pkc1);
769 _prb_refresh_rx_retire_blk_timer(pkc1);
770
771 smp_wmb();
775} 772}
776 773
777/* 774/*
@@ -862,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
862 prb_close_block(pkc, pbd, po, status); 859 prb_close_block(pkc, pbd, po, status);
863 return; 860 return;
864 } 861 }
865
866 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
867 dump_stack();
868 BUG();
869} 862}
870 863
871static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, 864static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
diff --git a/net/socket.c b/net/socket.c
index b416093997da..6b94633ca61d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2412,7 +2412,7 @@ static const unsigned char nargs[21] = {
2412 2412
2413SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2413SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2414{ 2414{
2415 unsigned long a[6]; 2415 unsigned long a[AUDITSC_ARGS];
2416 unsigned long a0, a1; 2416 unsigned long a0, a1;
2417 int err; 2417 int err;
2418 unsigned int len; 2418 unsigned int len;
@@ -2428,7 +2428,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2428 if (copy_from_user(a, args, len)) 2428 if (copy_from_user(a, args, len))
2429 return -EFAULT; 2429 return -EFAULT;
2430 2430
2431 audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2431 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2432 if (err)
2433 return err;
2432 2434
2433 a0 = a[0]; 2435 a0 = a[0];
2434 a1 = a[1]; 2436 a1 = a[1];
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 9e4cb59ef9f0..14e9e53e63d5 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,8 @@
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-y := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-y := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o 8 gss_mech_switch.o svcauth_gss.o \
9 gss_rpc_upcall.o gss_rpc_xdr.o
9 10
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 11obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 12
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 51415b07174e..7da6b457f66a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
238 p = ERR_PTR(-EFAULT); 238 p = ERR_PTR(-EFAULT);
239 goto err; 239 goto err;
240 } 240 }
241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS); 241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
242 if (ret < 0) { 242 if (ret < 0) {
243 p = ERR_PTR(ret); 243 p = ERR_PTR(ret);
244 goto err; 244 goto err;
@@ -867,8 +867,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
867 err = -EINVAL; 867 err = -EINVAL;
868 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 868 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
869 if (!gss_auth->mech) { 869 if (!gss_auth->mech) {
870 printk(KERN_WARNING "%s: Pseudoflavor %d not found!\n", 870 dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
871 __func__, flavor);
872 goto err_free; 871 goto err_free;
873 } 872 }
874 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); 873 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 33255ff889c0..0d3c158ef8fa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -679,6 +679,7 @@ out_err:
679static int 679static int
680gss_import_sec_context_kerberos(const void *p, size_t len, 680gss_import_sec_context_kerberos(const void *p, size_t len,
681 struct gss_ctx *ctx_id, 681 struct gss_ctx *ctx_id,
682 time_t *endtime,
682 gfp_t gfp_mask) 683 gfp_t gfp_mask)
683{ 684{
684 const void *end = (const void *)((const char *)p + len); 685 const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
694 else 695 else
695 ret = gss_import_v2_context(p, end, ctx, gfp_mask); 696 ret = gss_import_v2_context(p, end, ctx, gfp_mask);
696 697
697 if (ret == 0) 698 if (ret == 0) {
698 ctx_id->internal_ctx_id = ctx; 699 ctx_id->internal_ctx_id = ctx;
699 else 700 if (endtime)
701 *endtime = ctx->endtime;
702 } else
700 kfree(ctx); 703 kfree(ctx);
701 704
702 dprintk("RPC: %s: returning %d\n", __func__, ret); 705 dprintk("RPC: %s: returning %d\n", __func__, ret);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 79881d6e68a1..defa9d33925c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -175,7 +175,7 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
175 return gm; 175 return gm;
176} 176}
177 177
178static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) 178struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
179{ 179{
180 struct gss_api_mech *pos, *gm = NULL; 180 struct gss_api_mech *pos, *gm = NULL;
181 char buf[32]; 181 char buf[32];
@@ -386,14 +386,15 @@ int
386gss_import_sec_context(const void *input_token, size_t bufsize, 386gss_import_sec_context(const void *input_token, size_t bufsize,
387 struct gss_api_mech *mech, 387 struct gss_api_mech *mech,
388 struct gss_ctx **ctx_id, 388 struct gss_ctx **ctx_id,
389 time_t *endtime,
389 gfp_t gfp_mask) 390 gfp_t gfp_mask)
390{ 391{
391 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask))) 392 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
392 return -ENOMEM; 393 return -ENOMEM;
393 (*ctx_id)->mech_type = gss_mech_get(mech); 394 (*ctx_id)->mech_type = gss_mech_get(mech);
394 395
395 return mech->gm_ops 396 return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
396 ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask); 397 *ctx_id, endtime, gfp_mask);
397} 398}
398 399
399/* gss_get_mic: compute a mic over message and return mic_token. */ 400/* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644
index 000000000000..d304f41260f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -0,0 +1,358 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.c
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/types.h>
22#include <linux/un.h>
23
24#include <linux/sunrpc/svcauth.h>
25#include "gss_rpc_upcall.h"
26
27#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
28
29#define GSSPROXY_PROGRAM (400112u)
30#define GSSPROXY_VERS_1 (1u)
31
32/*
33 * Encoding/Decoding functions
34 */
35
36enum {
37 GSSX_NULL = 0, /* Unused */
38 GSSX_INDICATE_MECHS = 1,
39 GSSX_GET_CALL_CONTEXT = 2,
40 GSSX_IMPORT_AND_CANON_NAME = 3,
41 GSSX_EXPORT_CRED = 4,
42 GSSX_IMPORT_CRED = 5,
43 GSSX_ACQUIRE_CRED = 6,
44 GSSX_STORE_CRED = 7,
45 GSSX_INIT_SEC_CONTEXT = 8,
46 GSSX_ACCEPT_SEC_CONTEXT = 9,
47 GSSX_RELEASE_HANDLE = 10,
48 GSSX_GET_MIC = 11,
49 GSSX_VERIFY = 12,
50 GSSX_WRAP = 13,
51 GSSX_UNWRAP = 14,
52 GSSX_WRAP_SIZE_LIMIT = 15,
53};
54
55#define PROC(proc, name) \
56[GSSX_##proc] = { \
57 .p_proc = GSSX_##proc, \
58 .p_encode = (kxdreproc_t)gssx_enc_##name, \
59 .p_decode = (kxdrdproc_t)gssx_dec_##name, \
60 .p_arglen = GSSX_ARG_##name##_sz, \
61 .p_replen = GSSX_RES_##name##_sz, \
62 .p_statidx = GSSX_##proc, \
63 .p_name = #proc, \
64}
65
66static struct rpc_procinfo gssp_procedures[] = {
67 PROC(INDICATE_MECHS, indicate_mechs),
68 PROC(GET_CALL_CONTEXT, get_call_context),
69 PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
70 PROC(EXPORT_CRED, export_cred),
71 PROC(IMPORT_CRED, import_cred),
72 PROC(ACQUIRE_CRED, acquire_cred),
73 PROC(STORE_CRED, store_cred),
74 PROC(INIT_SEC_CONTEXT, init_sec_context),
75 PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
76 PROC(RELEASE_HANDLE, release_handle),
77 PROC(GET_MIC, get_mic),
78 PROC(VERIFY, verify),
79 PROC(WRAP, wrap),
80 PROC(UNWRAP, unwrap),
81 PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
82};
83
84
85
86/*
87 * Common transport functions
88 */
89
90static const struct rpc_program gssp_program;
91
92static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
93{
94 static const struct sockaddr_un gssp_localaddr = {
95 .sun_family = AF_LOCAL,
96 .sun_path = GSSPROXY_SOCK_PATHNAME,
97 };
98 struct rpc_create_args args = {
99 .net = net,
100 .protocol = XPRT_TRANSPORT_LOCAL,
101 .address = (struct sockaddr *)&gssp_localaddr,
102 .addrsize = sizeof(gssp_localaddr),
103 .servername = "localhost",
104 .program = &gssp_program,
105 .version = GSSPROXY_VERS_1,
106 .authflavor = RPC_AUTH_NULL,
107 /*
108 * Note we want connection to be done in the caller's
109 * filesystem namespace. We therefore turn off the idle
110 * timeout, which would result in reconnections being
111 * done without the correct namespace:
112 */
113 .flags = RPC_CLNT_CREATE_NOPING |
114 RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
115 };
116 struct rpc_clnt *clnt;
117 int result = 0;
118
119 clnt = rpc_create(&args);
120 if (IS_ERR(clnt)) {
121 dprintk("RPC: failed to create AF_LOCAL gssproxy "
122 "client (errno %ld).\n", PTR_ERR(clnt));
123 result = -PTR_ERR(clnt);
124 *_clnt = NULL;
125 goto out;
126 }
127
128 dprintk("RPC: created new gssp local client (gssp_local_clnt: "
129 "%p)\n", clnt);
130 *_clnt = clnt;
131
132out:
133 return result;
134}
135
136void init_gssp_clnt(struct sunrpc_net *sn)
137{
138 mutex_init(&sn->gssp_lock);
139 sn->gssp_clnt = NULL;
140 init_waitqueue_head(&sn->gssp_wq);
141}
142
143int set_gssp_clnt(struct net *net)
144{
145 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
146 struct rpc_clnt *clnt;
147 int ret;
148
149 mutex_lock(&sn->gssp_lock);
150 ret = gssp_rpc_create(net, &clnt);
151 if (!ret) {
152 if (sn->gssp_clnt)
153 rpc_shutdown_client(sn->gssp_clnt);
154 sn->gssp_clnt = clnt;
155 }
156 mutex_unlock(&sn->gssp_lock);
157 wake_up(&sn->gssp_wq);
158 return ret;
159}
160
161void clear_gssp_clnt(struct sunrpc_net *sn)
162{
163 mutex_lock(&sn->gssp_lock);
164 if (sn->gssp_clnt) {
165 rpc_shutdown_client(sn->gssp_clnt);
166 sn->gssp_clnt = NULL;
167 }
168 mutex_unlock(&sn->gssp_lock);
169}
170
171static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
172{
173 struct rpc_clnt *clnt;
174
175 mutex_lock(&sn->gssp_lock);
176 clnt = sn->gssp_clnt;
177 if (clnt)
178 atomic_inc(&clnt->cl_count);
179 mutex_unlock(&sn->gssp_lock);
180 return clnt;
181}
182
183static int gssp_call(struct net *net, struct rpc_message *msg)
184{
185 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
186 struct rpc_clnt *clnt;
187 int status;
188
189 clnt = get_gssp_clnt(sn);
190 if (!clnt)
191 return -EIO;
192 status = rpc_call_sync(clnt, msg, 0);
193 if (status < 0) {
194 dprintk("gssp: rpc_call returned error %d\n", -status);
195 switch (status) {
196 case -EPROTONOSUPPORT:
197 status = -EINVAL;
198 break;
199 case -ECONNREFUSED:
200 case -ETIMEDOUT:
201 case -ENOTCONN:
202 status = -EAGAIN;
203 break;
204 case -ERESTARTSYS:
205 if (signalled ())
206 status = -EINTR;
207 break;
208 default:
209 break;
210 }
211 }
212 rpc_release_client(clnt);
213 return status;
214}
215
216
217/*
218 * Public functions
219 */
220
221/* numbers somewhat arbitrary but large enough for current needs */
222#define GSSX_MAX_OUT_HANDLE 128
223#define GSSX_MAX_SRC_PRINC 256
224#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
225 GSSX_max_oid_sz + \
226 GSSX_max_princ_sz + \
227 sizeof(struct svc_cred))
228
229int gssp_accept_sec_context_upcall(struct net *net,
230 struct gssp_upcall_data *data)
231{
232 struct gssx_ctx ctxh = {
233 .state = data->in_handle
234 };
235 struct gssx_arg_accept_sec_context arg = {
236 .input_token = data->in_token,
237 };
238 struct gssx_ctx rctxh = {
239 /*
240 * pass in the max length we expect for each of these
241 * buffers but let the xdr code kmalloc them:
242 */
243 .exported_context_token.len = GSSX_max_output_handle_sz,
244 .mech.len = GSS_OID_MAX_LEN,
245 .src_name.display_name.len = GSSX_max_princ_sz
246 };
247 struct gssx_res_accept_sec_context res = {
248 .context_handle = &rctxh,
249 .output_token = &data->out_token
250 };
251 struct rpc_message msg = {
252 .rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
253 .rpc_argp = &arg,
254 .rpc_resp = &res,
255 .rpc_cred = NULL, /* FIXME ? */
256 };
257 struct xdr_netobj client_name = { 0 , NULL };
258 int ret;
259
260 if (data->in_handle.len != 0)
261 arg.context_handle = &ctxh;
262 res.output_token->len = GSSX_max_output_token_sz;
263
264 /* use nfs/ for targ_name ? */
265
266 ret = gssp_call(net, &msg);
267
268 /* we need to fetch all data even in case of error so
269 * that we can free special strctures is they have been allocated */
270 data->major_status = res.status.major_status;
271 data->minor_status = res.status.minor_status;
272 if (res.context_handle) {
273 data->out_handle = rctxh.exported_context_token;
274 data->mech_oid.len = rctxh.mech.len;
275 memcpy(data->mech_oid.data, rctxh.mech.data,
276 data->mech_oid.len);
277 client_name = rctxh.src_name.display_name;
278 }
279
280 if (res.options.count == 1) {
281 gssx_buffer *value = &res.options.data[0].value;
282 /* Currently we only decode CREDS_VALUE, if we add
283 * anything else we'll have to loop and match on the
284 * option name */
285 if (value->len == 1) {
286 /* steal group info from struct svc_cred */
287 data->creds = *(struct svc_cred *)value->data;
288 data->found_creds = 1;
289 }
290 /* whether we use it or not, free data */
291 kfree(value->data);
292 }
293
294 if (res.options.count != 0) {
295 kfree(res.options.data);
296 }
297
298 /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
299 if (data->found_creds && client_name.data != NULL) {
300 char *c;
301
302 data->creds.cr_principal = kstrndup(client_name.data,
303 client_name.len, GFP_KERNEL);
304 if (data->creds.cr_principal) {
305 /* terminate and remove realm part */
306 c = strchr(data->creds.cr_principal, '@');
307 if (c) {
308 *c = '\0';
309
310 /* change service-hostname delimiter */
311 c = strchr(data->creds.cr_principal, '/');
312 if (c) *c = '@';
313 }
314 if (!c) {
315 /* not a service principal */
316 kfree(data->creds.cr_principal);
317 data->creds.cr_principal = NULL;
318 }
319 }
320 }
321 kfree(client_name.data);
322
323 return ret;
324}
325
326void gssp_free_upcall_data(struct gssp_upcall_data *data)
327{
328 kfree(data->in_handle.data);
329 kfree(data->out_handle.data);
330 kfree(data->out_token.data);
331 kfree(data->mech_oid.data);
332 free_svc_cred(&data->creds);
333}
334
335/*
336 * Initialization stuff
337 */
338
339static const struct rpc_version gssp_version1 = {
340 .number = GSSPROXY_VERS_1,
341 .nrprocs = ARRAY_SIZE(gssp_procedures),
342 .procs = gssp_procedures,
343};
344
345static const struct rpc_version *gssp_version[] = {
346 NULL,
347 &gssp_version1,
348};
349
350static struct rpc_stat gssp_stats;
351
352static const struct rpc_program gssp_program = {
353 .name = "gssproxy",
354 .number = GSSPROXY_PROGRAM,
355 .nrvers = ARRAY_SIZE(gssp_version),
356 .version = gssp_version,
357 .stats = &gssp_stats,
358};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644
index 000000000000..1e542aded90a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -0,0 +1,48 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.h
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _GSS_RPC_UPCALL_H
22#define _GSS_RPC_UPCALL_H
23
24#include <linux/sunrpc/gss_api.h>
25#include <linux/sunrpc/auth_gss.h>
26#include "gss_rpc_xdr.h"
27#include "../netns.h"
28
29struct gssp_upcall_data {
30 struct xdr_netobj in_handle;
31 struct gssp_in_token in_token;
32 struct xdr_netobj out_handle;
33 struct xdr_netobj out_token;
34 struct rpcsec_gss_oid mech_oid;
35 struct svc_cred creds;
36 int found_creds;
37 int major_status;
38 int minor_status;
39};
40
41int gssp_accept_sec_context_upcall(struct net *net,
42 struct gssp_upcall_data *data);
43void gssp_free_upcall_data(struct gssp_upcall_data *data);
44
45void init_gssp_clnt(struct sunrpc_net *);
46int set_gssp_clnt(struct net *);
47void clear_gssp_clnt(struct sunrpc_net *);
48#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644
index 000000000000..357f613df7ff
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -0,0 +1,840 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/sunrpc/svcauth.h>
22#include "gss_rpc_xdr.h"
23
24static int gssx_enc_bool(struct xdr_stream *xdr, int v)
25{
26 __be32 *p;
27
28 p = xdr_reserve_space(xdr, 4);
29 if (unlikely(p == NULL))
30 return -ENOSPC;
31 *p = v ? xdr_one : xdr_zero;
32 return 0;
33}
34
35static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
36{
37 __be32 *p;
38
39 p = xdr_inline_decode(xdr, 4);
40 if (unlikely(p == NULL))
41 return -ENOSPC;
42 *v = be32_to_cpu(*p);
43 return 0;
44}
45
46static int gssx_enc_buffer(struct xdr_stream *xdr,
47 gssx_buffer *buf)
48{
49 __be32 *p;
50
51 p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
52 if (!p)
53 return -ENOSPC;
54 xdr_encode_opaque(p, buf->data, buf->len);
55 return 0;
56}
57
58static int gssx_enc_in_token(struct xdr_stream *xdr,
59 struct gssp_in_token *in)
60{
61 __be32 *p;
62
63 p = xdr_reserve_space(xdr, 4);
64 if (!p)
65 return -ENOSPC;
66 *p = cpu_to_be32(in->page_len);
67
68 /* all we need to do is to write pages */
69 xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
70
71 return 0;
72}
73
74
75static int gssx_dec_buffer(struct xdr_stream *xdr,
76 gssx_buffer *buf)
77{
78 u32 length;
79 __be32 *p;
80
81 p = xdr_inline_decode(xdr, 4);
82 if (unlikely(p == NULL))
83 return -ENOSPC;
84
85 length = be32_to_cpup(p);
86 p = xdr_inline_decode(xdr, length);
87 if (unlikely(p == NULL))
88 return -ENOSPC;
89
90 if (buf->len == 0) {
91 /* we intentionally are not interested in this buffer */
92 return 0;
93 }
94 if (length > buf->len)
95 return -ENOSPC;
96
97 if (!buf->data) {
98 buf->data = kmemdup(p, length, GFP_KERNEL);
99 if (!buf->data)
100 return -ENOMEM;
101 } else {
102 memcpy(buf->data, p, length);
103 }
104 buf->len = length;
105 return 0;
106}
107
108static int gssx_enc_option(struct xdr_stream *xdr,
109 struct gssx_option *opt)
110{
111 int err;
112
113 err = gssx_enc_buffer(xdr, &opt->option);
114 if (err)
115 return err;
116 err = gssx_enc_buffer(xdr, &opt->value);
117 return err;
118}
119
120static int gssx_dec_option(struct xdr_stream *xdr,
121 struct gssx_option *opt)
122{
123 int err;
124
125 err = gssx_dec_buffer(xdr, &opt->option);
126 if (err)
127 return err;
128 err = gssx_dec_buffer(xdr, &opt->value);
129 return err;
130}
131
132static int dummy_enc_opt_array(struct xdr_stream *xdr,
133 struct gssx_option_array *oa)
134{
135 __be32 *p;
136
137 if (oa->count != 0)
138 return -EINVAL;
139
140 p = xdr_reserve_space(xdr, 4);
141 if (!p)
142 return -ENOSPC;
143 *p = 0;
144
145 return 0;
146}
147
148static int dummy_dec_opt_array(struct xdr_stream *xdr,
149 struct gssx_option_array *oa)
150{
151 struct gssx_option dummy;
152 u32 count, i;
153 __be32 *p;
154
155 p = xdr_inline_decode(xdr, 4);
156 if (unlikely(p == NULL))
157 return -ENOSPC;
158 count = be32_to_cpup(p++);
159 memset(&dummy, 0, sizeof(dummy));
160 for (i = 0; i < count; i++) {
161 gssx_dec_option(xdr, &dummy);
162 }
163
164 oa->count = 0;
165 oa->data = NULL;
166 return 0;
167}
168
169static int get_s32(void **p, void *max, s32 *res)
170{
171 void *base = *p;
172 void *next = (void *)((char *)base + sizeof(s32));
173 if (unlikely(next > max || next < base))
174 return -EINVAL;
175 memcpy(res, base, sizeof(s32));
176 *p = next;
177 return 0;
178}
179
180static int gssx_dec_linux_creds(struct xdr_stream *xdr,
181 struct svc_cred *creds)
182{
183 u32 length;
184 __be32 *p;
185 void *q, *end;
186 s32 tmp;
187 int N, i, err;
188
189 p = xdr_inline_decode(xdr, 4);
190 if (unlikely(p == NULL))
191 return -ENOSPC;
192
193 length = be32_to_cpup(p);
194
195 /* FIXME: we do not want to use the scratch buffer for this one
196 * may need to use functions that allows us to access an io vector
197 * directly */
198 p = xdr_inline_decode(xdr, length);
199 if (unlikely(p == NULL))
200 return -ENOSPC;
201
202 q = p;
203 end = q + length;
204
205 /* uid */
206 err = get_s32(&q, end, &tmp);
207 if (err)
208 return err;
209 creds->cr_uid = make_kuid(&init_user_ns, tmp);
210
211 /* gid */
212 err = get_s32(&q, end, &tmp);
213 if (err)
214 return err;
215 creds->cr_gid = make_kgid(&init_user_ns, tmp);
216
217 /* number of additional gid's */
218 err = get_s32(&q, end, &tmp);
219 if (err)
220 return err;
221 N = tmp;
222 creds->cr_group_info = groups_alloc(N);
223 if (creds->cr_group_info == NULL)
224 return -ENOMEM;
225
226 /* gid's */
227 for (i = 0; i < N; i++) {
228 kgid_t kgid;
229 err = get_s32(&q, end, &tmp);
230 if (err)
231 goto out_free_groups;
232 err = -EINVAL;
233 kgid = make_kgid(&init_user_ns, tmp);
234 if (!gid_valid(kgid))
235 goto out_free_groups;
236 GROUP_AT(creds->cr_group_info, i) = kgid;
237 }
238
239 return 0;
240out_free_groups:
241 groups_free(creds->cr_group_info);
242 return err;
243}
244
245static int gssx_dec_option_array(struct xdr_stream *xdr,
246 struct gssx_option_array *oa)
247{
248 struct svc_cred *creds;
249 u32 count, i;
250 __be32 *p;
251 int err;
252
253 p = xdr_inline_decode(xdr, 4);
254 if (unlikely(p == NULL))
255 return -ENOSPC;
256 count = be32_to_cpup(p++);
257 if (!count)
258 return 0;
259
260 /* we recognize only 1 currently: CREDS_VALUE */
261 oa->count = 1;
262
263 oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
264 if (!oa->data)
265 return -ENOMEM;
266
267 creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
268 if (!creds) {
269 kfree(oa->data);
270 return -ENOMEM;
271 }
272
273 oa->data[0].option.data = CREDS_VALUE;
274 oa->data[0].option.len = sizeof(CREDS_VALUE);
275 oa->data[0].value.data = (void *)creds;
276 oa->data[0].value.len = 0;
277
278 for (i = 0; i < count; i++) {
279 gssx_buffer dummy = { 0, NULL };
280 u32 length;
281
282 /* option buffer */
283 p = xdr_inline_decode(xdr, 4);
284 if (unlikely(p == NULL))
285 return -ENOSPC;
286
287 length = be32_to_cpup(p);
288 p = xdr_inline_decode(xdr, length);
289 if (unlikely(p == NULL))
290 return -ENOSPC;
291
292 if (length == sizeof(CREDS_VALUE) &&
293 memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
294 /* We have creds here. parse them */
295 err = gssx_dec_linux_creds(xdr, creds);
296 if (err)
297 return err;
298 oa->data[0].value.len = 1; /* presence */
299 } else {
300 /* consume uninteresting buffer */
301 err = gssx_dec_buffer(xdr, &dummy);
302 if (err)
303 return err;
304 }
305 }
306 return 0;
307}
308
309static int gssx_dec_status(struct xdr_stream *xdr,
310 struct gssx_status *status)
311{
312 __be32 *p;
313 int err;
314
315 /* status->major_status */
316 p = xdr_inline_decode(xdr, 8);
317 if (unlikely(p == NULL))
318 return -ENOSPC;
319 p = xdr_decode_hyper(p, &status->major_status);
320
321 /* status->mech */
322 err = gssx_dec_buffer(xdr, &status->mech);
323 if (err)
324 return err;
325
326 /* status->minor_status */
327 p = xdr_inline_decode(xdr, 8);
328 if (unlikely(p == NULL))
329 return -ENOSPC;
330 p = xdr_decode_hyper(p, &status->minor_status);
331
332 /* status->major_status_string */
333 err = gssx_dec_buffer(xdr, &status->major_status_string);
334 if (err)
335 return err;
336
337 /* status->minor_status_string */
338 err = gssx_dec_buffer(xdr, &status->minor_status_string);
339 if (err)
340 return err;
341
342 /* status->server_ctx */
343 err = gssx_dec_buffer(xdr, &status->server_ctx);
344 if (err)
345 return err;
346
347 /* we assume we have no options for now, so simply consume them */
348 /* status->options */
349 err = dummy_dec_opt_array(xdr, &status->options);
350
351 return err;
352}
353
354static int gssx_enc_call_ctx(struct xdr_stream *xdr,
355 struct gssx_call_ctx *ctx)
356{
357 struct gssx_option opt;
358 __be32 *p;
359 int err;
360
361 /* ctx->locale */
362 err = gssx_enc_buffer(xdr, &ctx->locale);
363 if (err)
364 return err;
365
366 /* ctx->server_ctx */
367 err = gssx_enc_buffer(xdr, &ctx->server_ctx);
368 if (err)
369 return err;
370
371 /* we always want to ask for lucid contexts */
372 /* ctx->options */
373 p = xdr_reserve_space(xdr, 4);
374 *p = cpu_to_be32(2);
375
376 /* we want a lucid_v1 context */
377 opt.option.data = LUCID_OPTION;
378 opt.option.len = sizeof(LUCID_OPTION);
379 opt.value.data = LUCID_VALUE;
380 opt.value.len = sizeof(LUCID_VALUE);
381 err = gssx_enc_option(xdr, &opt);
382
383 /* ..and user creds */
384 opt.option.data = CREDS_OPTION;
385 opt.option.len = sizeof(CREDS_OPTION);
386 opt.value.data = CREDS_VALUE;
387 opt.value.len = sizeof(CREDS_VALUE);
388 err = gssx_enc_option(xdr, &opt);
389
390 return err;
391}
392
393static int gssx_dec_name_attr(struct xdr_stream *xdr,
394 struct gssx_name_attr *attr)
395{
396 int err;
397
398 /* attr->attr */
399 err = gssx_dec_buffer(xdr, &attr->attr);
400 if (err)
401 return err;
402
403 /* attr->value */
404 err = gssx_dec_buffer(xdr, &attr->value);
405 if (err)
406 return err;
407
408 /* attr->extensions */
409 err = dummy_dec_opt_array(xdr, &attr->extensions);
410
411 return err;
412}
413
414static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
415 struct gssx_name_attr_array *naa)
416{
417 __be32 *p;
418
419 if (naa->count != 0)
420 return -EINVAL;
421
422 p = xdr_reserve_space(xdr, 4);
423 if (!p)
424 return -ENOSPC;
425 *p = 0;
426
427 return 0;
428}
429
430static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
431 struct gssx_name_attr_array *naa)
432{
433 struct gssx_name_attr dummy;
434 u32 count, i;
435 __be32 *p;
436
437 p = xdr_inline_decode(xdr, 4);
438 if (unlikely(p == NULL))
439 return -ENOSPC;
440 count = be32_to_cpup(p++);
441 for (i = 0; i < count; i++) {
442 gssx_dec_name_attr(xdr, &dummy);
443 }
444
445 naa->count = 0;
446 naa->data = NULL;
447 return 0;
448}
449
450static struct xdr_netobj zero_netobj = {};
451
452static struct gssx_name_attr_array zero_name_attr_array = {};
453
454static struct gssx_option_array zero_option_array = {};
455
456static int gssx_enc_name(struct xdr_stream *xdr,
457 struct gssx_name *name)
458{
459 int err;
460
461 /* name->display_name */
462 err = gssx_enc_buffer(xdr, &name->display_name);
463 if (err)
464 return err;
465
466 /* name->name_type */
467 err = gssx_enc_buffer(xdr, &zero_netobj);
468 if (err)
469 return err;
470
471 /* name->exported_name */
472 err = gssx_enc_buffer(xdr, &zero_netobj);
473 if (err)
474 return err;
475
476 /* name->exported_composite_name */
477 err = gssx_enc_buffer(xdr, &zero_netobj);
478 if (err)
479 return err;
480
481 /* leave name_attributes empty for now, will add once we have any
482 * to pass up at all */
483 /* name->name_attributes */
484 err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
485 if (err)
486 return err;
487
488 /* leave options empty for now, will add once we have any options
489 * to pass up at all */
490 /* name->extensions */
491 err = dummy_enc_opt_array(xdr, &zero_option_array);
492
493 return err;
494}
495
496static int gssx_dec_name(struct xdr_stream *xdr,
497 struct gssx_name *name)
498{
499 struct xdr_netobj dummy_netobj;
500 struct gssx_name_attr_array dummy_name_attr_array;
501 struct gssx_option_array dummy_option_array;
502 int err;
503
504 /* name->display_name */
505 err = gssx_dec_buffer(xdr, &name->display_name);
506 if (err)
507 return err;
508
509 /* name->name_type */
510 err = gssx_dec_buffer(xdr, &dummy_netobj);
511 if (err)
512 return err;
513
514 /* name->exported_name */
515 err = gssx_dec_buffer(xdr, &dummy_netobj);
516 if (err)
517 return err;
518
519 /* name->exported_composite_name */
520 err = gssx_dec_buffer(xdr, &dummy_netobj);
521 if (err)
522 return err;
523
524 /* we assume we have no attributes for now, so simply consume them */
525 /* name->name_attributes */
526 err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
527 if (err)
528 return err;
529
530 /* we assume we have no options for now, so simply consume them */
531 /* name->extensions */
532 err = dummy_dec_opt_array(xdr, &dummy_option_array);
533
534 return err;
535}
536
537static int dummy_enc_credel_array(struct xdr_stream *xdr,
538 struct gssx_cred_element_array *cea)
539{
540 __be32 *p;
541
542 if (cea->count != 0)
543 return -EINVAL;
544
545 p = xdr_reserve_space(xdr, 4);
546 if (!p)
547 return -ENOSPC;
548 *p = 0;
549
550 return 0;
551}
552
553static int gssx_enc_cred(struct xdr_stream *xdr,
554 struct gssx_cred *cred)
555{
556 int err;
557
558 /* cred->desired_name */
559 err = gssx_enc_name(xdr, &cred->desired_name);
560 if (err)
561 return err;
562
563 /* cred->elements */
564 err = dummy_enc_credel_array(xdr, &cred->elements);
565
566 /* cred->cred_handle_reference */
567 err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
568 if (err)
569 return err;
570
571 /* cred->needs_release */
572 err = gssx_enc_bool(xdr, cred->needs_release);
573
574 return err;
575}
576
577static int gssx_enc_ctx(struct xdr_stream *xdr,
578 struct gssx_ctx *ctx)
579{
580 __be32 *p;
581 int err;
582
583 /* ctx->exported_context_token */
584 err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
585 if (err)
586 return err;
587
588 /* ctx->state */
589 err = gssx_enc_buffer(xdr, &ctx->state);
590 if (err)
591 return err;
592
593 /* ctx->need_release */
594 err = gssx_enc_bool(xdr, ctx->need_release);
595 if (err)
596 return err;
597
598 /* ctx->mech */
599 err = gssx_enc_buffer(xdr, &ctx->mech);
600 if (err)
601 return err;
602
603 /* ctx->src_name */
604 err = gssx_enc_name(xdr, &ctx->src_name);
605 if (err)
606 return err;
607
608 /* ctx->targ_name */
609 err = gssx_enc_name(xdr, &ctx->targ_name);
610 if (err)
611 return err;
612
613 /* ctx->lifetime */
614 p = xdr_reserve_space(xdr, 8+8);
615 if (!p)
616 return -ENOSPC;
617 p = xdr_encode_hyper(p, ctx->lifetime);
618
619 /* ctx->ctx_flags */
620 p = xdr_encode_hyper(p, ctx->ctx_flags);
621
622 /* ctx->locally_initiated */
623 err = gssx_enc_bool(xdr, ctx->locally_initiated);
624 if (err)
625 return err;
626
627 /* ctx->open */
628 err = gssx_enc_bool(xdr, ctx->open);
629 if (err)
630 return err;
631
632 /* leave options empty for now, will add once we have any options
633 * to pass up at all */
634 /* ctx->options */
635 err = dummy_enc_opt_array(xdr, &ctx->options);
636
637 return err;
638}
639
640static int gssx_dec_ctx(struct xdr_stream *xdr,
641 struct gssx_ctx *ctx)
642{
643 __be32 *p;
644 int err;
645
646 /* ctx->exported_context_token */
647 err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
648 if (err)
649 return err;
650
651 /* ctx->state */
652 err = gssx_dec_buffer(xdr, &ctx->state);
653 if (err)
654 return err;
655
656 /* ctx->need_release */
657 err = gssx_dec_bool(xdr, &ctx->need_release);
658 if (err)
659 return err;
660
661 /* ctx->mech */
662 err = gssx_dec_buffer(xdr, &ctx->mech);
663 if (err)
664 return err;
665
666 /* ctx->src_name */
667 err = gssx_dec_name(xdr, &ctx->src_name);
668 if (err)
669 return err;
670
671 /* ctx->targ_name */
672 err = gssx_dec_name(xdr, &ctx->targ_name);
673 if (err)
674 return err;
675
676 /* ctx->lifetime */
677 p = xdr_inline_decode(xdr, 8+8);
678 if (unlikely(p == NULL))
679 return -ENOSPC;
680 p = xdr_decode_hyper(p, &ctx->lifetime);
681
682 /* ctx->ctx_flags */
683 p = xdr_decode_hyper(p, &ctx->ctx_flags);
684
685 /* ctx->locally_initiated */
686 err = gssx_dec_bool(xdr, &ctx->locally_initiated);
687 if (err)
688 return err;
689
690 /* ctx->open */
691 err = gssx_dec_bool(xdr, &ctx->open);
692 if (err)
693 return err;
694
695 /* we assume we have no options for now, so simply consume them */
696 /* ctx->options */
697 err = dummy_dec_opt_array(xdr, &ctx->options);
698
699 return err;
700}
701
702static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
703{
704 __be32 *p;
705 int err;
706
707 /* cb->initiator_addrtype */
708 p = xdr_reserve_space(xdr, 8);
709 if (!p)
710 return -ENOSPC;
711 p = xdr_encode_hyper(p, cb->initiator_addrtype);
712
713 /* cb->initiator_address */
714 err = gssx_enc_buffer(xdr, &cb->initiator_address);
715 if (err)
716 return err;
717
718 /* cb->acceptor_addrtype */
719 p = xdr_reserve_space(xdr, 8);
720 if (!p)
721 return -ENOSPC;
722 p = xdr_encode_hyper(p, cb->acceptor_addrtype);
723
724 /* cb->acceptor_address */
725 err = gssx_enc_buffer(xdr, &cb->acceptor_address);
726 if (err)
727 return err;
728
729 /* cb->application_data */
730 err = gssx_enc_buffer(xdr, &cb->application_data);
731
732 return err;
733}
734
735void gssx_enc_accept_sec_context(struct rpc_rqst *req,
736 struct xdr_stream *xdr,
737 struct gssx_arg_accept_sec_context *arg)
738{
739 int err;
740
741 err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
742 if (err)
743 goto done;
744
745 /* arg->context_handle */
746 if (arg->context_handle) {
747 err = gssx_enc_ctx(xdr, arg->context_handle);
748 if (err)
749 goto done;
750 } else {
751 err = gssx_enc_bool(xdr, 0);
752 }
753
754 /* arg->cred_handle */
755 if (arg->cred_handle) {
756 err = gssx_enc_cred(xdr, arg->cred_handle);
757 if (err)
758 goto done;
759 } else {
760 err = gssx_enc_bool(xdr, 0);
761 }
762
763 /* arg->input_token */
764 err = gssx_enc_in_token(xdr, &arg->input_token);
765 if (err)
766 goto done;
767
768 /* arg->input_cb */
769 if (arg->input_cb) {
770 err = gssx_enc_cb(xdr, arg->input_cb);
771 if (err)
772 goto done;
773 } else {
774 err = gssx_enc_bool(xdr, 0);
775 }
776
777 err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
778 if (err)
779 goto done;
780
781 /* leave options empty for now, will add once we have any options
782 * to pass up at all */
783 /* arg->options */
784 err = dummy_enc_opt_array(xdr, &arg->options);
785
786done:
787 if (err)
788 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
789}
790
791int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
792 struct xdr_stream *xdr,
793 struct gssx_res_accept_sec_context *res)
794{
795 u32 value_follows;
796 int err;
797
798 /* res->status */
799 err = gssx_dec_status(xdr, &res->status);
800 if (err)
801 return err;
802
803 /* res->context_handle */
804 err = gssx_dec_bool(xdr, &value_follows);
805 if (err)
806 return err;
807 if (value_follows) {
808 err = gssx_dec_ctx(xdr, res->context_handle);
809 if (err)
810 return err;
811 } else {
812 res->context_handle = NULL;
813 }
814
815 /* res->output_token */
816 err = gssx_dec_bool(xdr, &value_follows);
817 if (err)
818 return err;
819 if (value_follows) {
820 err = gssx_dec_buffer(xdr, res->output_token);
821 if (err)
822 return err;
823 } else {
824 res->output_token = NULL;
825 }
826
827 /* res->delegated_cred_handle */
828 err = gssx_dec_bool(xdr, &value_follows);
829 if (err)
830 return err;
831 if (value_follows) {
832 /* we do not support upcall servers sending this data. */
833 return -EINVAL;
834 }
835
836 /* res->options */
837 err = gssx_dec_option_array(xdr, &res->options);
838
839 return err;
840}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644
index 000000000000..1c98b27d870c
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -0,0 +1,264 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _LINUX_GSS_RPC_XDR_H
22#define _LINUX_GSS_RPC_XDR_H
23
24#include <linux/sunrpc/xdr.h>
25#include <linux/sunrpc/clnt.h>
26#include <linux/sunrpc/xprtsock.h>
27
28#ifdef RPC_DEBUG
29# define RPCDBG_FACILITY RPCDBG_AUTH
30#endif
31
32#define LUCID_OPTION "exported_context_type"
33#define LUCID_VALUE "linux_lucid_v1"
34#define CREDS_OPTION "exported_creds_type"
35#define CREDS_VALUE "linux_creds_v1"
36
37typedef struct xdr_netobj gssx_buffer;
38typedef struct xdr_netobj utf8string;
39typedef struct xdr_netobj gssx_OID;
40
41enum gssx_cred_usage {
42 GSSX_C_INITIATE = 1,
43 GSSX_C_ACCEPT = 2,
44 GSSX_C_BOTH = 3,
45};
46
47struct gssx_option {
48 gssx_buffer option;
49 gssx_buffer value;
50};
51
52struct gssx_option_array {
53 u32 count;
54 struct gssx_option *data;
55};
56
57struct gssx_status {
58 u64 major_status;
59 gssx_OID mech;
60 u64 minor_status;
61 utf8string major_status_string;
62 utf8string minor_status_string;
63 gssx_buffer server_ctx;
64 struct gssx_option_array options;
65};
66
67struct gssx_call_ctx {
68 utf8string locale;
69 gssx_buffer server_ctx;
70 struct gssx_option_array options;
71};
72
73struct gssx_name_attr {
74 gssx_buffer attr;
75 gssx_buffer value;
76 struct gssx_option_array extensions;
77};
78
79struct gssx_name_attr_array {
80 u32 count;
81 struct gssx_name_attr *data;
82};
83
84struct gssx_name {
85 gssx_buffer display_name;
86};
87typedef struct gssx_name gssx_name;
88
89struct gssx_cred_element {
90 gssx_name MN;
91 gssx_OID mech;
92 u32 cred_usage;
93 u64 initiator_time_rec;
94 u64 acceptor_time_rec;
95 struct gssx_option_array options;
96};
97
98struct gssx_cred_element_array {
99 u32 count;
100 struct gssx_cred_element *data;
101};
102
103struct gssx_cred {
104 gssx_name desired_name;
105 struct gssx_cred_element_array elements;
106 gssx_buffer cred_handle_reference;
107 u32 needs_release;
108};
109
110struct gssx_ctx {
111 gssx_buffer exported_context_token;
112 gssx_buffer state;
113 u32 need_release;
114 gssx_OID mech;
115 gssx_name src_name;
116 gssx_name targ_name;
117 u64 lifetime;
118 u64 ctx_flags;
119 u32 locally_initiated;
120 u32 open;
121 struct gssx_option_array options;
122};
123
124struct gssx_cb {
125 u64 initiator_addrtype;
126 gssx_buffer initiator_address;
127 u64 acceptor_addrtype;
128 gssx_buffer acceptor_address;
129 gssx_buffer application_data;
130};
131
132
133/* This structure is not defined in the protocol.
134 * It is used in the kernel to carry around a big buffer
135 * as a set of pages */
136struct gssp_in_token {
137 struct page **pages; /* Array of contiguous pages */
138 unsigned int page_base; /* Start of page data */
139 unsigned int page_len; /* Length of page data */
140};
141
142struct gssx_arg_accept_sec_context {
143 struct gssx_call_ctx call_ctx;
144 struct gssx_ctx *context_handle;
145 struct gssx_cred *cred_handle;
146 struct gssp_in_token input_token;
147 struct gssx_cb *input_cb;
148 u32 ret_deleg_cred;
149 struct gssx_option_array options;
150};
151
152struct gssx_res_accept_sec_context {
153 struct gssx_status status;
154 struct gssx_ctx *context_handle;
155 gssx_buffer *output_token;
156 /* struct gssx_cred *delegated_cred_handle; not used in kernel */
157 struct gssx_option_array options;
158};
159
160
161
162#define gssx_enc_indicate_mechs NULL
163#define gssx_dec_indicate_mechs NULL
164#define gssx_enc_get_call_context NULL
165#define gssx_dec_get_call_context NULL
166#define gssx_enc_import_and_canon_name NULL
167#define gssx_dec_import_and_canon_name NULL
168#define gssx_enc_export_cred NULL
169#define gssx_dec_export_cred NULL
170#define gssx_enc_import_cred NULL
171#define gssx_dec_import_cred NULL
172#define gssx_enc_acquire_cred NULL
173#define gssx_dec_acquire_cred NULL
174#define gssx_enc_store_cred NULL
175#define gssx_dec_store_cred NULL
176#define gssx_enc_init_sec_context NULL
177#define gssx_dec_init_sec_context NULL
178void gssx_enc_accept_sec_context(struct rpc_rqst *req,
179 struct xdr_stream *xdr,
180 struct gssx_arg_accept_sec_context *args);
181int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
182 struct xdr_stream *xdr,
183 struct gssx_res_accept_sec_context *res);
184#define gssx_enc_release_handle NULL
185#define gssx_dec_release_handle NULL
186#define gssx_enc_get_mic NULL
187#define gssx_dec_get_mic NULL
188#define gssx_enc_verify NULL
189#define gssx_dec_verify NULL
190#define gssx_enc_wrap NULL
191#define gssx_dec_wrap NULL
192#define gssx_enc_unwrap NULL
193#define gssx_dec_unwrap NULL
194#define gssx_enc_wrap_size_limit NULL
195#define gssx_dec_wrap_size_limit NULL
196
197/* non implemented calls are set to 0 size */
198#define GSSX_ARG_indicate_mechs_sz 0
199#define GSSX_RES_indicate_mechs_sz 0
200#define GSSX_ARG_get_call_context_sz 0
201#define GSSX_RES_get_call_context_sz 0
202#define GSSX_ARG_import_and_canon_name_sz 0
203#define GSSX_RES_import_and_canon_name_sz 0
204#define GSSX_ARG_export_cred_sz 0
205#define GSSX_RES_export_cred_sz 0
206#define GSSX_ARG_import_cred_sz 0
207#define GSSX_RES_import_cred_sz 0
208#define GSSX_ARG_acquire_cred_sz 0
209#define GSSX_RES_acquire_cred_sz 0
210#define GSSX_ARG_store_cred_sz 0
211#define GSSX_RES_store_cred_sz 0
212#define GSSX_ARG_init_sec_context_sz 0
213#define GSSX_RES_init_sec_context_sz 0
214
215#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
216 8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
217 8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
218#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
219 4 + 4 + 4)
220#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
221#define GSSX_default_in_token_sz 4 /* does *not* include token data */
222#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
223#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
224 GSSX_default_in_ctx_hndl_sz + \
225 GSSX_default_in_cred_sz + \
226 GSSX_default_in_token_sz + \
227 GSSX_default_in_cb_sz + \
228 4 /* no deleg creds boolean */ + \
229 4) /* empty options */
230
231/* somewhat arbitrary numbers but large enough (we ignore some of the data
232 * sent down, but it is part of the protocol so we need enough space to take
233 * it in) */
234#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
235#define GSSX_max_output_handle_sz 128
236#define GSSX_max_oid_sz 16
237#define GSSX_max_princ_sz 256
238#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
239 16 + 4 + GSSX_max_oid_sz + \
240 2 * GSSX_max_princ_sz + \
241 8 + 8 + 4 + 4 + 4)
242#define GSSX_max_output_token_sz 1024
243#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
244#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
245 GSSX_default_ctx_sz + \
246 GSSX_max_output_token_sz + \
247 4 + GSSX_max_creds_sz)
248
249#define GSSX_ARG_release_handle_sz 0
250#define GSSX_RES_release_handle_sz 0
251#define GSSX_ARG_get_mic_sz 0
252#define GSSX_RES_get_mic_sz 0
253#define GSSX_ARG_verify_sz 0
254#define GSSX_RES_verify_sz 0
255#define GSSX_ARG_wrap_sz 0
256#define GSSX_RES_wrap_sz 0
257#define GSSX_ARG_unwrap_sz 0
258#define GSSX_RES_unwrap_sz 0
259#define GSSX_ARG_wrap_size_limit_sz 0
260#define GSSX_RES_wrap_size_limit_sz 0
261
262
263
264#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c3ba570222dc..871c73c92165 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -48,8 +48,8 @@
48#include <linux/sunrpc/svcauth.h> 48#include <linux/sunrpc/svcauth.h>
49#include <linux/sunrpc/svcauth_gss.h> 49#include <linux/sunrpc/svcauth_gss.h>
50#include <linux/sunrpc/cache.h> 50#include <linux/sunrpc/cache.h>
51#include "gss_rpc_upcall.h"
51 52
52#include "../netns.h"
53 53
54#ifdef RPC_DEBUG 54#ifdef RPC_DEBUG
55# define RPCDBG_FACILITY RPCDBG_AUTH 55# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -497,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
497 len = qword_get(&mesg, buf, mlen); 497 len = qword_get(&mesg, buf, mlen);
498 if (len < 0) 498 if (len < 0)
499 goto out; 499 goto out;
500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL); 500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
501 NULL, GFP_KERNEL);
501 if (status) 502 if (status)
502 goto out; 503 goto out;
503 504
@@ -505,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
505 len = qword_get(&mesg, buf, mlen); 506 len = qword_get(&mesg, buf, mlen);
506 if (len > 0) { 507 if (len > 0) {
507 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); 508 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
508 if (!rsci.cred.cr_principal) 509 if (!rsci.cred.cr_principal) {
510 status = -ENOMEM;
509 goto out; 511 goto out;
512 }
510 } 513 }
511 514
512 } 515 }
@@ -987,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
987} 990}
988 991
989static inline int 992static inline int
990gss_read_verf(struct rpc_gss_wire_cred *gc, 993gss_read_common_verf(struct rpc_gss_wire_cred *gc,
991 struct kvec *argv, __be32 *authp, 994 struct kvec *argv, __be32 *authp,
992 struct xdr_netobj *in_handle, 995 struct xdr_netobj *in_handle)
993 struct xdr_netobj *in_token)
994{ 996{
995 struct xdr_netobj tmpobj;
996
997 /* Read the verifier; should be NULL: */ 997 /* Read the verifier; should be NULL: */
998 *authp = rpc_autherr_badverf; 998 *authp = rpc_autherr_badverf;
999 if (argv->iov_len < 2 * 4) 999 if (argv->iov_len < 2 * 4)
@@ -1009,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1009 if (dup_netobj(in_handle, &gc->gc_ctx)) 1009 if (dup_netobj(in_handle, &gc->gc_ctx))
1010 return SVC_CLOSE; 1010 return SVC_CLOSE;
1011 *authp = rpc_autherr_badverf; 1011 *authp = rpc_autherr_badverf;
1012
1013 return 0;
1014}
1015
1016static inline int
1017gss_read_verf(struct rpc_gss_wire_cred *gc,
1018 struct kvec *argv, __be32 *authp,
1019 struct xdr_netobj *in_handle,
1020 struct xdr_netobj *in_token)
1021{
1022 struct xdr_netobj tmpobj;
1023 int res;
1024
1025 res = gss_read_common_verf(gc, argv, authp, in_handle);
1026 if (res)
1027 return res;
1028
1012 if (svc_safe_getnetobj(argv, &tmpobj)) { 1029 if (svc_safe_getnetobj(argv, &tmpobj)) {
1013 kfree(in_handle->data); 1030 kfree(in_handle->data);
1014 return SVC_DENIED; 1031 return SVC_DENIED;
@@ -1021,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1021 return 0; 1038 return 0;
1022} 1039}
1023 1040
1041/* Ok this is really heavily depending on a set of semantics in
1042 * how rqstp is set up by svc_recv and pages laid down by the
1043 * server when reading a request. We are basically guaranteed that
1044 * the token lays all down linearly across a set of pages, starting
1045 * at iov_base in rq_arg.head[0] which happens to be the first of a
1046 * set of pages stored in rq_pages[].
1047 * rq_arg.head[0].iov_base will provide us the page_base to pass
1048 * to the upcall.
1049 */
1050static inline int
1051gss_read_proxy_verf(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp,
1053 struct xdr_netobj *in_handle,
1054 struct gssp_in_token *in_token)
1055{
1056 struct kvec *argv = &rqstp->rq_arg.head[0];
1057 u32 inlen;
1058 int res;
1059
1060 res = gss_read_common_verf(gc, argv, authp, in_handle);
1061 if (res)
1062 return res;
1063
1064 inlen = svc_getnl(argv);
1065 if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
1066 return SVC_DENIED;
1067
1068 in_token->pages = rqstp->rq_pages;
1069 in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
1070 in_token->page_len = inlen;
1071
1072 return 0;
1073}
1074
1024static inline int 1075static inline int
1025gss_write_resv(struct kvec *resv, size_t size_limit, 1076gss_write_resv(struct kvec *resv, size_t size_limit,
1026 struct xdr_netobj *out_handle, struct xdr_netobj *out_token, 1077 struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1048,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
1048 * the upcall results are available, write the verifier and result. 1099 * the upcall results are available, write the verifier and result.
1049 * Otherwise, drop the request pending an answer to the upcall. 1100 * Otherwise, drop the request pending an answer to the upcall.
1050 */ 1101 */
1051static int svcauth_gss_handle_init(struct svc_rqst *rqstp, 1102static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp) 1103 struct rpc_gss_wire_cred *gc, __be32 *authp)
1053{ 1104{
1054 struct kvec *argv = &rqstp->rq_arg.head[0]; 1105 struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1088,6 +1139,287 @@ out:
1088 return ret; 1139 return ret;
1089} 1140}
1090 1141
1142static int gss_proxy_save_rsc(struct cache_detail *cd,
1143 struct gssp_upcall_data *ud,
1144 uint64_t *handle)
1145{
1146 struct rsc rsci, *rscp = NULL;
1147 static atomic64_t ctxhctr;
1148 long long ctxh;
1149 struct gss_api_mech *gm = NULL;
1150 time_t expiry;
1151 int status = -EINVAL;
1152
1153 memset(&rsci, 0, sizeof(rsci));
1154 /* context handle */
1155 status = -ENOMEM;
1156 /* the handle needs to be just a unique id,
1157 * use a static counter */
1158 ctxh = atomic64_inc_return(&ctxhctr);
1159
1160 /* make a copy for the caller */
1161 *handle = ctxh;
1162
1163 /* make a copy for the rsc cache */
1164 if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
1165 goto out;
1166 rscp = rsc_lookup(cd, &rsci);
1167 if (!rscp)
1168 goto out;
1169
1170 /* creds */
1171 if (!ud->found_creds) {
1172 /* userspace seem buggy, we should always get at least a
1173 * mapping to nobody */
1174 dprintk("RPC: No creds found, marking Negative!\n");
1175 set_bit(CACHE_NEGATIVE, &rsci.h.flags);
1176 } else {
1177
1178 /* steal creds */
1179 rsci.cred = ud->creds;
1180 memset(&ud->creds, 0, sizeof(struct svc_cred));
1181
1182 status = -EOPNOTSUPP;
1183 /* get mech handle from OID */
1184 gm = gss_mech_get_by_OID(&ud->mech_oid);
1185 if (!gm)
1186 goto out;
1187
1188 status = -EINVAL;
1189 /* mech-specific data: */
1190 status = gss_import_sec_context(ud->out_handle.data,
1191 ud->out_handle.len,
1192 gm, &rsci.mechctx,
1193 &expiry, GFP_KERNEL);
1194 if (status)
1195 goto out;
1196 }
1197
1198 rsci.h.expiry_time = expiry;
1199 rscp = rsc_update(cd, &rsci, rscp);
1200 status = 0;
1201out:
1202 gss_mech_put(gm);
1203 rsc_free(&rsci);
1204 if (rscp)
1205 cache_put(&rscp->h, cd);
1206 else
1207 status = -ENOMEM;
1208 return status;
1209}
1210
1211static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
1212 struct rpc_gss_wire_cred *gc, __be32 *authp)
1213{
1214 struct kvec *resv = &rqstp->rq_res.head[0];
1215 struct xdr_netobj cli_handle;
1216 struct gssp_upcall_data ud;
1217 uint64_t handle;
1218 int status;
1219 int ret;
1220 struct net *net = rqstp->rq_xprt->xpt_net;
1221 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1222
1223 memset(&ud, 0, sizeof(ud));
1224 ret = gss_read_proxy_verf(rqstp, gc, authp,
1225 &ud.in_handle, &ud.in_token);
1226 if (ret)
1227 return ret;
1228
1229 ret = SVC_CLOSE;
1230
1231 /* Perform synchronous upcall to gss-proxy */
1232 status = gssp_accept_sec_context_upcall(net, &ud);
1233 if (status)
1234 goto out;
1235
1236 dprintk("RPC: svcauth_gss: gss major status = %d\n",
1237 ud.major_status);
1238
1239 switch (ud.major_status) {
1240 case GSS_S_CONTINUE_NEEDED:
1241 cli_handle = ud.out_handle;
1242 break;
1243 case GSS_S_COMPLETE:
1244 status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
1245 if (status)
1246 goto out;
1247 cli_handle.data = (u8 *)&handle;
1248 cli_handle.len = sizeof(handle);
1249 break;
1250 default:
1251 ret = SVC_CLOSE;
1252 goto out;
1253 }
1254
1255 /* Got an answer to the upcall; use it: */
1256 if (gss_write_init_verf(sn->rsc_cache, rqstp,
1257 &cli_handle, &ud.major_status))
1258 goto out;
1259 if (gss_write_resv(resv, PAGE_SIZE,
1260 &cli_handle, &ud.out_token,
1261 ud.major_status, ud.minor_status))
1262 goto out;
1263
1264 ret = SVC_COMPLETE;
1265out:
1266 gssp_free_upcall_data(&ud);
1267 return ret;
1268}
1269
1270DEFINE_SPINLOCK(use_gssp_lock);
1271
1272static bool use_gss_proxy(struct net *net)
1273{
1274 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1275
1276 if (sn->use_gss_proxy != -1)
1277 return sn->use_gss_proxy;
1278 spin_lock(&use_gssp_lock);
1279 /*
1280 * If you wanted gss-proxy, you should have said so before
1281 * starting to accept requests:
1282 */
1283 sn->use_gss_proxy = 0;
1284 spin_unlock(&use_gssp_lock);
1285 return 0;
1286}
1287
1288#ifdef CONFIG_PROC_FS
1289
1290static bool set_gss_proxy(struct net *net, int type)
1291{
1292 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1293 int ret = 0;
1294
1295 WARN_ON_ONCE(type != 0 && type != 1);
1296 spin_lock(&use_gssp_lock);
1297 if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
1298 sn->use_gss_proxy = type;
1299 else
1300 ret = -EBUSY;
1301 spin_unlock(&use_gssp_lock);
1302 wake_up(&sn->gssp_wq);
1303 return ret;
1304}
1305
1306static inline bool gssp_ready(struct sunrpc_net *sn)
1307{
1308 switch (sn->use_gss_proxy) {
1309 case -1:
1310 return false;
1311 case 0:
1312 return true;
1313 case 1:
1314 return sn->gssp_clnt;
1315 }
1316 WARN_ON_ONCE(1);
1317 return false;
1318}
1319
1320static int wait_for_gss_proxy(struct net *net)
1321{
1322 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1323
1324 return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
1325}
1326
1327
1328static ssize_t write_gssp(struct file *file, const char __user *buf,
1329 size_t count, loff_t *ppos)
1330{
1331 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1332 char tbuf[20];
1333 unsigned long i;
1334 int res;
1335
1336 if (*ppos || count > sizeof(tbuf)-1)
1337 return -EINVAL;
1338 if (copy_from_user(tbuf, buf, count))
1339 return -EFAULT;
1340
1341 tbuf[count] = 0;
1342 res = kstrtoul(tbuf, 0, &i);
1343 if (res)
1344 return res;
1345 if (i != 1)
1346 return -EINVAL;
1347 res = set_gss_proxy(net, 1);
1348 if (res)
1349 return res;
1350 res = set_gssp_clnt(net);
1351 if (res)
1352 return res;
1353 return count;
1354}
1355
1356static ssize_t read_gssp(struct file *file, char __user *buf,
1357 size_t count, loff_t *ppos)
1358{
1359 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1360 unsigned long p = *ppos;
1361 char tbuf[10];
1362 size_t len;
1363 int ret;
1364
1365 ret = wait_for_gss_proxy(net);
1366 if (ret)
1367 return ret;
1368
1369 snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
1370 len = strlen(tbuf);
1371 if (p >= len)
1372 return 0;
1373 len -= p;
1374 if (len > count)
1375 len = count;
1376 if (copy_to_user(buf, (void *)(tbuf+p), len))
1377 return -EFAULT;
1378 *ppos += len;
1379 return len;
1380}
1381
1382static const struct file_operations use_gss_proxy_ops = {
1383 .open = nonseekable_open,
1384 .write = write_gssp,
1385 .read = read_gssp,
1386};
1387
1388static int create_use_gss_proxy_proc_entry(struct net *net)
1389{
1390 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1391 struct proc_dir_entry **p = &sn->use_gssp_proc;
1392
1393 sn->use_gss_proxy = -1;
1394 *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
1395 sn->proc_net_rpc,
1396 &use_gss_proxy_ops, net);
1397 if (!*p)
1398 return -ENOMEM;
1399 init_gssp_clnt(sn);
1400 return 0;
1401}
1402
1403static void destroy_use_gss_proxy_proc_entry(struct net *net)
1404{
1405 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1406
1407 if (sn->use_gssp_proc) {
1408 remove_proc_entry("use-gss-proxy", sn->proc_net_rpc);
1409 clear_gssp_clnt(sn);
1410 }
1411}
1412#else /* CONFIG_PROC_FS */
1413
1414static int create_use_gss_proxy_proc_entry(struct net *net)
1415{
1416 return 0;
1417}
1418
1419static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
1420
1421#endif /* CONFIG_PROC_FS */
1422
1091/* 1423/*
1092 * Accept an rpcsec packet. 1424 * Accept an rpcsec packet.
1093 * If context establishment, punt to user space 1425 * If context establishment, punt to user space
@@ -1154,7 +1486,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1154 switch (gc->gc_proc) { 1486 switch (gc->gc_proc) {
1155 case RPC_GSS_PROC_INIT: 1487 case RPC_GSS_PROC_INIT:
1156 case RPC_GSS_PROC_CONTINUE_INIT: 1488 case RPC_GSS_PROC_CONTINUE_INIT:
1157 return svcauth_gss_handle_init(rqstp, gc, authp); 1489 if (use_gss_proxy(SVC_NET(rqstp)))
1490 return svcauth_gss_proxy_init(rqstp, gc, authp);
1491 else
1492 return svcauth_gss_legacy_init(rqstp, gc, authp);
1158 case RPC_GSS_PROC_DATA: 1493 case RPC_GSS_PROC_DATA:
1159 case RPC_GSS_PROC_DESTROY: 1494 case RPC_GSS_PROC_DESTROY:
1160 /* Look up the context, and check the verifier: */ 1495 /* Look up the context, and check the verifier: */
@@ -1531,7 +1866,12 @@ gss_svc_init_net(struct net *net)
1531 rv = rsi_cache_create_net(net); 1866 rv = rsi_cache_create_net(net);
1532 if (rv) 1867 if (rv)
1533 goto out1; 1868 goto out1;
1869 rv = create_use_gss_proxy_proc_entry(net);
1870 if (rv)
1871 goto out2;
1534 return 0; 1872 return 0;
1873out2:
1874 destroy_use_gss_proxy_proc_entry(net);
1535out1: 1875out1:
1536 rsc_cache_destroy_net(net); 1876 rsc_cache_destroy_net(net);
1537 return rv; 1877 return rv;
@@ -1540,6 +1880,7 @@ out1:
1540void 1880void
1541gss_svc_shutdown_net(struct net *net) 1881gss_svc_shutdown_net(struct net *net)
1542{ 1882{
1883 destroy_use_gss_proxy_proc_entry(net);
1543 rsi_cache_destroy_net(net); 1884 rsi_cache_destroy_net(net);
1544 rsc_cache_destroy_net(net); 1885 rsc_cache_destroy_net(net);
1545} 1886}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f1889be80912..80fe5c86efd1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -986,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
986 nonseekable_open(inode, filp); 986 nonseekable_open(inode, filp);
987 if (filp->f_mode & FMODE_READ) { 987 if (filp->f_mode & FMODE_READ) {
988 rp = kmalloc(sizeof(*rp), GFP_KERNEL); 988 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
989 if (!rp) 989 if (!rp) {
990 module_put(cd->owner);
990 return -ENOMEM; 991 return -ENOMEM;
992 }
991 rp->offset = 0; 993 rp->offset = 0;
992 rp->q.reader = 1; 994 rp->q.reader = 1;
993 atomic_inc(&cd->readers); 995 atomic_inc(&cd->readers);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d259fa966927..5a750b9c3640 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -360,7 +360,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
360 360
361 auth = rpcauth_create(args->authflavor, clnt); 361 auth = rpcauth_create(args->authflavor, clnt);
362 if (IS_ERR(auth)) { 362 if (IS_ERR(auth)) {
363 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", 363 dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
364 args->authflavor); 364 args->authflavor);
365 err = PTR_ERR(auth); 365 err = PTR_ERR(auth);
366 goto out_no_auth; 366 goto out_no_auth;
@@ -413,6 +413,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
413 413
414 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) 414 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
415 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; 415 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
416 if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
417 xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
416 /* 418 /*
417 * If the caller chooses not to specify a hostname, whip 419 * If the caller chooses not to specify a hostname, whip
418 * up a string representation of the passed-in address. 420 * up a string representation of the passed-in address.
@@ -681,6 +683,7 @@ rpc_release_client(struct rpc_clnt *clnt)
681 if (atomic_dec_and_test(&clnt->cl_count)) 683 if (atomic_dec_and_test(&clnt->cl_count))
682 rpc_free_auth(clnt); 684 rpc_free_auth(clnt);
683} 685}
686EXPORT_SYMBOL_GPL(rpc_release_client);
684 687
685/** 688/**
686 * rpc_bind_new_program - bind a new RPC program to an existing client 689 * rpc_bind_new_program - bind a new RPC program to an existing client
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index ce7bd449173d..7111a4c9113b 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,12 @@ struct sunrpc_net {
23 struct rpc_clnt *rpcb_local_clnt4; 23 struct rpc_clnt *rpcb_local_clnt4;
24 spinlock_t rpcb_clnt_lock; 24 spinlock_t rpcb_clnt_lock;
25 unsigned int rpcb_users; 25 unsigned int rpcb_users;
26
27 struct mutex gssp_lock;
28 wait_queue_head_t gssp_wq;
29 struct rpc_clnt *gssp_clnt;
30 int use_gss_proxy;
31 struct proc_dir_entry *use_gssp_proc;
26}; 32};
27 33
28extern int sunrpc_net_id; 34extern int sunrpc_net_id;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 745fca3cfd36..095363eee764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1300,6 +1300,8 @@ found:
1300 -PTR_ERR(xprt)); 1300 -PTR_ERR(xprt));
1301 goto out; 1301 goto out;
1302 } 1302 }
1303 if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
1304 xprt->idle_timeout = 0;
1303 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1305 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1304 if (xprt_has_timer(xprt)) 1306 if (xprt_has_timer(xprt))
1305 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1307 setup_timer(&xprt->timer, xprt_init_autodisconnect,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9c2825827dec..ffd50348a509 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2655,6 +2655,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2655 } 2655 }
2656 xprt_set_bound(xprt); 2656 xprt_set_bound(xprt);
2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); 2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
2658 ret = ERR_PTR(xs_local_setup_socket(transport));
2659 if (ret)
2660 goto out_err;
2658 break; 2661 break;
2659 default: 2662 default:
2660 ret = ERR_PTR(-EAFNOSUPPORT); 2663 ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 25e159c2feb4..e5f3da507823 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
584{ 584{
585 int bp_index; 585 int bp_index;
586 586
587 /* 587 /* Prepare broadcast link message for reliable transmission,
588 * Prepare broadcast link message for reliable transmission,
589 * if first time trying to send it; 588 * if first time trying to send it;
590 * preparation is skipped for broadcast link protocol messages 589 * preparation is skipped for broadcast link protocol messages
591 * since they are sent in an unreliable manner and don't need it 590 * since they are sent in an unreliable manner and don't need it
@@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
611 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 610 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
612 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; 611 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
613 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; 612 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
613 struct tipc_bearer *b = p;
614 struct sk_buff *tbuf;
614 615
615 if (!p) 616 if (!p)
616 break; /* no more bearers to try */ 617 break; /* No more bearers to try */
618
619 if (tipc_bearer_blocked(p)) {
620 if (!s || tipc_bearer_blocked(s))
621 continue; /* Can't use either bearer */
622 b = s;
623 }
617 624
618 tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); 625 tipc_nmap_diff(&bcbearer->remains, &b->nodes,
626 &bcbearer->remains_new);
619 if (bcbearer->remains_new.count == bcbearer->remains.count) 627 if (bcbearer->remains_new.count == bcbearer->remains.count)
620 continue; /* bearer pair doesn't add anything */ 628 continue; /* Nothing added by bearer pair */
621 629
622 if (!tipc_bearer_blocked(p)) 630 if (bp_index == 0) {
623 tipc_bearer_send(p, buf, &p->bcast_addr); 631 /* Use original buffer for first bearer */
624 else if (s && !tipc_bearer_blocked(s)) 632 tipc_bearer_send(b, buf, &b->bcast_addr);
625 /* unable to send on primary bearer */ 633 } else {
626 tipc_bearer_send(s, buf, &s->bcast_addr); 634 /* Avoid concurrent buffer access */
627 else 635 tbuf = pskb_copy(buf, GFP_ATOMIC);
628 /* unable to send on either bearer */ 636 if (!tbuf)
629 continue; 637 break;
638 tipc_bearer_send(b, tbuf, &b->bcast_addr);
639 kfree_skb(tbuf); /* Bearer keeps a clone */
640 }
630 641
642 /* Swap bearers for next packet */
631 if (s) { 643 if (s) {
632 bcbearer->bpairs[bp_index].primary = s; 644 bcbearer->bpairs[bp_index].primary = s;
633 bcbearer->bpairs[bp_index].secondary = p; 645 bcbearer->bpairs[bp_index].secondary = p;
634 } 646 }
635 647
636 if (bcbearer->remains_new.count == 0) 648 if (bcbearer->remains_new.count == 0)
637 break; /* all targets reached */ 649 break; /* All targets reached */
638 650
639 bcbearer->remains = bcbearer->remains_new; 651 bcbearer->remains = bcbearer->remains_new;
640 } 652 }
diff --git a/net/tipc/link.c b/net/tipc/link.c
index daa6080a2a0c..a80feee5197a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2306,8 +2306,11 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); 2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
2307 u32 msg_typ = msg_type(tunnel_msg); 2307 u32 msg_typ = msg_type(tunnel_msg);
2308 u32 msg_count = msg_msgcnt(tunnel_msg); 2308 u32 msg_count = msg_msgcnt(tunnel_msg);
2309 u32 bearer_id = msg_bearer_id(tunnel_msg);
2309 2310
2310 dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; 2311 if (bearer_id >= MAX_BEARERS)
2312 goto exit;
2313 dest_link = (*l_ptr)->owner->links[bearer_id];
2311 if (!dest_link) 2314 if (!dest_link)
2312 goto exit; 2315 goto exit;
2313 if (dest_link == *l_ptr) { 2316 if (dest_link == *l_ptr) {
@@ -2521,14 +2524,16 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2521 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm); 2524 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
2522 u32 msg_sz = msg_size(imsg); 2525 u32 msg_sz = msg_size(imsg);
2523 u32 fragm_sz = msg_data_sz(fragm); 2526 u32 fragm_sz = msg_data_sz(fragm);
2524 u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz); 2527 u32 exp_fragm_cnt;
2525 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE; 2528 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
2529
2526 if (msg_type(imsg) == TIPC_MCAST_MSG) 2530 if (msg_type(imsg) == TIPC_MCAST_MSG)
2527 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; 2531 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
2528 if (msg_size(imsg) > max) { 2532 if (fragm_sz == 0 || msg_size(imsg) > max) {
2529 kfree_skb(fbuf); 2533 kfree_skb(fbuf);
2530 return 0; 2534 return 0;
2531 } 2535 }
2536 exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
2532 pbuf = tipc_buf_acquire(msg_size(imsg)); 2537 pbuf = tipc_buf_acquire(msg_size(imsg));
2533 if (pbuf != NULL) { 2538 if (pbuf != NULL) {
2534 pbuf->next = *pending; 2539 pbuf->next = *pending;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00a75dc..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
311 .sadb_alg_maxbits = 128 311 .sadb_alg_maxbits = 128
312 } 312 }
313}, 313},
314{
315 /* rfc4494 */
316 .name = "cmac(aes)",
317
318 .uinfo = {
319 .auth = {
320 .icv_truncbits = 96,
321 .icv_fullbits = 128,
322 }
323 },
324
325 .pfkey_supported = 0,
326},
314}; 327};
315 328
316static struct xfrm_algo_desc ealg_list[] = { 329static struct xfrm_algo_desc ealg_list[] = {