summaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 14:35:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 14:35:00 -0400
commit0c76c6ba246043bbc5c0f9620a0645ae78217421 (patch)
tree644a4db58706c4e97478951f0a3a0087ddf26e5e /net/ceph
parent8688d9540cc6e17df4cba71615e27f04e0378fe6 (diff)
parent5a60e87603c4c533492c515b7f62578189b03c9c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "We have a pile of bug fixes from Ilya, including a few patches that sync up the CRUSH code with the latest from userspace. There is also a long series from Zheng that fixes various issues with snapshots, inline data, and directory fsync, some simplification and improvement in the cap release code, and a rework of the caching of directory contents. To top it off there are a few small fixes and cleanups from Benoit and Hong" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (40 commits) rbd: use GFP_NOIO in rbd_obj_request_create() crush: fix a bug in tree bucket decode libceph: Fix ceph_tcp_sendpage()'s more boolean usage libceph: Remove spurious kunmap() of the zero page rbd: queue_depth map option rbd: store rbd_options in rbd_device rbd: terminate rbd_opts_tokens with Opt_err ceph: fix ceph_writepages_start() rbd: bump queue_max_segments ceph: rework dcache readdir crush: sync up with userspace crush: fix crash from invalid 'take' argument ceph: switch some GFP_NOFS memory allocation to GFP_KERNEL ceph: pre-allocate data structure that tracks caps flushing ceph: re-send flushing caps (which are revoked) in reconnect stage ceph: send TID of the oldest pending caps flush to MDS ceph: track pending caps flushing globally ceph: track pending caps flushing accurately libceph: fix wrong name "Ceph filesystem for Linux" ceph: fix directory fsync ...
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/ceph_common.c50
-rw-r--r--net/ceph/crush/crush.c13
-rw-r--r--net/ceph/crush/crush_ln_table.h32
-rw-r--r--net/ceph/crush/hash.c8
-rw-r--r--net/ceph/crush/mapper.c148
-rw-r--r--net/ceph/messenger.c3
-rw-r--r--net/ceph/mon_client.c13
-rw-r--r--net/ceph/osd_client.c42
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/ceph/pagevec.c5
10 files changed, 197 insertions, 119 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 79e8f71aef5b..cb7db320dd27 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
352 /* start with defaults */ 352 /* start with defaults */
353 opt->flags = CEPH_OPT_DEFAULT; 353 opt->flags = CEPH_OPT_DEFAULT;
354 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 354 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
355 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 355 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
356 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ 356 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
357 357
358 /* get mon ip(s) */ 358 /* get mon ip(s) */
359 /* ip1[:port1][,ip2[:port2]...] */ 359 /* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
439 pr_warn("ignoring deprecated osdtimeout option\n"); 439 pr_warn("ignoring deprecated osdtimeout option\n");
440 break; 440 break;
441 case Opt_osdkeepalivetimeout: 441 case Opt_osdkeepalivetimeout:
442 opt->osd_keepalive_timeout = intval; 442 /* 0 isn't well defined right now, reject it */
443 if (intval < 1 || intval > INT_MAX / 1000) {
444 pr_err("osdkeepalive out of range\n");
445 err = -EINVAL;
446 goto out;
447 }
448 opt->osd_keepalive_timeout =
449 msecs_to_jiffies(intval * 1000);
443 break; 450 break;
444 case Opt_osd_idle_ttl: 451 case Opt_osd_idle_ttl:
445 opt->osd_idle_ttl = intval; 452 /* 0 isn't well defined right now, reject it */
453 if (intval < 1 || intval > INT_MAX / 1000) {
454 pr_err("osd_idle_ttl out of range\n");
455 err = -EINVAL;
456 goto out;
457 }
458 opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
446 break; 459 break;
447 case Opt_mount_timeout: 460 case Opt_mount_timeout:
448 opt->mount_timeout = intval; 461 /* 0 is "wait forever" (i.e. infinite timeout) */
462 if (intval < 0 || intval > INT_MAX / 1000) {
463 pr_err("mount_timeout out of range\n");
464 err = -EINVAL;
465 goto out;
466 }
467 opt->mount_timeout = msecs_to_jiffies(intval * 1000);
449 break; 468 break;
450 469
451 case Opt_share: 470 case Opt_share:
@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
512 seq_puts(m, "notcp_nodelay,"); 531 seq_puts(m, "notcp_nodelay,");
513 532
514 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 533 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
515 seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); 534 seq_printf(m, "mount_timeout=%d,",
535 jiffies_to_msecs(opt->mount_timeout) / 1000);
516 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 536 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
517 seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); 537 seq_printf(m, "osd_idle_ttl=%d,",
538 jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
518 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 539 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
519 seq_printf(m, "osdkeepalivetimeout=%d,", 540 seq_printf(m, "osdkeepalivetimeout=%d,",
520 opt->osd_keepalive_timeout); 541 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
521 542
522 /* drop redundant comma */ 543 /* drop redundant comma */
523 if (m->count != pos) 544 if (m->count != pos)
@@ -626,8 +647,8 @@ static int have_mon_and_osd_map(struct ceph_client *client)
626 */ 647 */
627int __ceph_open_session(struct ceph_client *client, unsigned long started) 648int __ceph_open_session(struct ceph_client *client, unsigned long started)
628{ 649{
629 int err; 650 unsigned long timeout = client->options->mount_timeout;
630 unsigned long timeout = client->options->mount_timeout * HZ; 651 long err;
631 652
632 /* open session, and wait for mon and osd maps */ 653 /* open session, and wait for mon and osd maps */
633 err = ceph_monc_open_session(&client->monc); 654 err = ceph_monc_open_session(&client->monc);
@@ -635,16 +656,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
635 return err; 656 return err;
636 657
637 while (!have_mon_and_osd_map(client)) { 658 while (!have_mon_and_osd_map(client)) {
638 err = -EIO;
639 if (timeout && time_after_eq(jiffies, started + timeout)) 659 if (timeout && time_after_eq(jiffies, started + timeout))
640 return err; 660 return -ETIMEDOUT;
641 661
642 /* wait */ 662 /* wait */
643 dout("mount waiting for mon_map\n"); 663 dout("mount waiting for mon_map\n");
644 err = wait_event_interruptible_timeout(client->auth_wq, 664 err = wait_event_interruptible_timeout(client->auth_wq,
645 have_mon_and_osd_map(client) || (client->auth_err < 0), 665 have_mon_and_osd_map(client) || (client->auth_err < 0),
646 timeout); 666 ceph_timeout_jiffies(timeout));
647 if (err == -EINTR || err == -ERESTARTSYS) 667 if (err < 0)
648 return err; 668 return err;
649 if (client->auth_err < 0) 669 if (client->auth_err < 0)
650 return client->auth_err; 670 return client->auth_err;
@@ -721,5 +741,5 @@ module_exit(exit_ceph_lib);
721MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 741MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
722MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 742MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
723MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 743MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
724MODULE_DESCRIPTION("Ceph filesystem for Linux"); 744MODULE_DESCRIPTION("Ceph core library");
725MODULE_LICENSE("GPL"); 745MODULE_LICENSE("GPL");
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 9d84ce4ea0df..80d7c3a97cb8 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,15 +1,11 @@
1
2#ifdef __KERNEL__ 1#ifdef __KERNEL__
3# include <linux/slab.h> 2# include <linux/slab.h>
3# include <linux/crush/crush.h>
4#else 4#else
5# include <stdlib.h> 5# include "crush_compat.h"
6# include <assert.h> 6# include "crush.h"
7# define kfree(x) do { if (x) free(x); } while (0)
8# define BUG_ON(x) assert(!(x))
9#endif 7#endif
10 8
11#include <linux/crush/crush.h>
12
13const char *crush_bucket_alg_name(int alg) 9const char *crush_bucket_alg_name(int alg)
14{ 10{
15 switch (alg) { 11 switch (alg) {
@@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map)
134 kfree(map->rules); 130 kfree(map->rules);
135 } 131 }
136 132
133#ifndef __KERNEL__
134 kfree(map->choose_tries);
135#endif
137 kfree(map); 136 kfree(map);
138} 137}
139 138
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
index 6192c7fc958c..aae534c901a4 100644
--- a/net/ceph/crush/crush_ln_table.h
+++ b/net/ceph/crush/crush_ln_table.h
@@ -10,20 +10,20 @@
10 * 10 *
11 */ 11 */
12 12
13#if defined(__linux__)
14#include <linux/types.h>
15#elif defined(__FreeBSD__)
16#include <sys/types.h>
17#endif
18
19#ifndef CEPH_CRUSH_LN_H 13#ifndef CEPH_CRUSH_LN_H
20#define CEPH_CRUSH_LN_H 14#define CEPH_CRUSH_LN_H
21 15
16#ifdef __KERNEL__
17# include <linux/types.h>
18#else
19# include "crush_compat.h"
20#endif
22 21
23// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) 22/*
24// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) 23 * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
25 24 * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
26static int64_t __RH_LH_tbl[128*2+2] = { 25 */
26static __s64 __RH_LH_tbl[128*2+2] = {
27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, 27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, 28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, 29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
@@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = {
89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, 89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, 90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
91 0x0000800000000000ll, 0x0000ffff00000000ll, 91 0x0000800000000000ll, 0x0000ffff00000000ll,
92 }; 92};
93
94 93
95 // LL_tbl[k] = 2^48*log2(1.0+k/2^15); 94/*
96static int64_t __LL_tbl[256] = { 95 * LL_tbl[k] = 2^48*log2(1.0+k/2^15)
96 */
97static __s64 __LL_tbl[256] = {
97 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, 98 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
98 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, 99 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
99 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, 100 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
@@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = {
160 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, 161 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
161}; 162};
162 163
163
164
165
166#endif 164#endif
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index 5bb63e37a8a1..ed123af49eba 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,6 +1,8 @@
1 1#ifdef __KERNEL__
2#include <linux/types.h> 2# include <linux/crush/hash.h>
3#include <linux/crush/hash.h> 3#else
4# include "hash.h"
5#endif
4 6
5/* 7/*
6 * Robert Jenkins' function for mixing 32-bit values 8 * Robert Jenkins' function for mixing 32-bit values
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 5b47736d27d9..393bfb22d5bb 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -1,27 +1,31 @@
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2015 Intel Corporation All Rights Reserved
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
1 12
2#ifdef __KERNEL__ 13#ifdef __KERNEL__
3# include <linux/string.h> 14# include <linux/string.h>
4# include <linux/slab.h> 15# include <linux/slab.h>
5# include <linux/bug.h> 16# include <linux/bug.h>
6# include <linux/kernel.h> 17# include <linux/kernel.h>
7# ifndef dprintk 18# include <linux/crush/crush.h>
8# define dprintk(args...) 19# include <linux/crush/hash.h>
9# endif
10#else 20#else
11# include <string.h> 21# include "crush_compat.h"
12# include <stdio.h> 22# include "crush.h"
13# include <stdlib.h> 23# include "hash.h"
14# include <assert.h>
15# define BUG_ON(x) assert(!(x))
16# define dprintk(args...) /* printf(args) */
17# define kmalloc(x, f) malloc(x)
18# define kfree(x) free(x)
19#endif 24#endif
20
21#include <linux/crush/crush.h>
22#include <linux/crush/hash.h>
23#include "crush_ln_table.h" 25#include "crush_ln_table.h"
24 26
27#define dprintk(args...) /* printf(args) */
28
25/* 29/*
26 * Implement the core CRUSH mapping algorithm. 30 * Implement the core CRUSH mapping algorithm.
27 */ 31 */
@@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
139 int i; 143 int i;
140 144
141 for (i = bucket->h.size-1; i >= 0; i--) { 145 for (i = bucket->h.size-1; i >= 0; i--) {
142 __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], 146 __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
143 r, bucket->h.id); 147 r, bucket->h.id);
144 w &= 0xffff; 148 w &= 0xffff;
145 dprintk("list_choose i=%d x=%d r=%d item %d weight %x " 149 dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
@@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
238 return bucket->h.items[high]; 242 return bucket->h.items[high];
239} 243}
240 244
241// compute 2^44*log2(input+1) 245/* compute 2^44*log2(input+1) */
242uint64_t crush_ln(unsigned xin) 246static __u64 crush_ln(unsigned int xin)
243{ 247{
244 unsigned x=xin, x1; 248 unsigned int x = xin, x1;
245 int iexpon, index1, index2; 249 int iexpon, index1, index2;
246 uint64_t RH, LH, LL, xl64, result; 250 __u64 RH, LH, LL, xl64, result;
247 251
248 x++; 252 x++;
249 253
250 // normalize input 254 /* normalize input */
251 iexpon = 15; 255 iexpon = 15;
252 while(!(x&0x18000)) { x<<=1; iexpon--; } 256 while (!(x & 0x18000)) {
257 x <<= 1;
258 iexpon--;
259 }
253 260
254 index1 = (x>>8)<<1; 261 index1 = (x >> 8) << 1;
255 // RH ~ 2^56/index1 262 /* RH ~ 2^56/index1 */
256 RH = __RH_LH_tbl[index1 - 256]; 263 RH = __RH_LH_tbl[index1 - 256];
257 // LH ~ 2^48 * log2(index1/256) 264 /* LH ~ 2^48 * log2(index1/256) */
258 LH = __RH_LH_tbl[index1 + 1 - 256]; 265 LH = __RH_LH_tbl[index1 + 1 - 256];
259 266
260 // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 267 /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
261 xl64 = (int64_t)x * RH; 268 xl64 = (__s64)x * RH;
262 xl64 >>= 48; 269 xl64 >>= 48;
263 x1 = xl64; 270 x1 = xl64;
264 271
265 result = iexpon; 272 result = iexpon;
266 result <<= (12 + 32); 273 result <<= (12 + 32);
267 274
268 index2 = x1 & 0xff; 275 index2 = x1 & 0xff;
269 // LL ~ 2^48*log2(1.0+index2/2^15) 276 /* LL ~ 2^48*log2(1.0+index2/2^15) */
270 LL = __LL_tbl[index2]; 277 LL = __LL_tbl[index2];
271 278
272 LH = LH + LL; 279 LH = LH + LL;
273 280
274 LH >>= (48-12 - 32); 281 LH >>= (48 - 12 - 32);
275 result += LH; 282 result += LH;
276 283
277 return result; 284 return result;
278} 285}
279 286
280 287
@@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin)
290static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, 297static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
291 int x, int r) 298 int x, int r)
292{ 299{
293 unsigned i, high = 0; 300 unsigned int i, high = 0;
294 unsigned u; 301 unsigned int u;
295 unsigned w; 302 unsigned int w;
296 __s64 ln, draw, high_draw = 0; 303 __s64 ln, draw, high_draw = 0;
297 304
298 for (i = 0; i < bucket->h.size; i++) { 305 for (i = 0; i < bucket->h.size; i++) {
@@ -567,6 +574,10 @@ reject:
567 out[outpos] = item; 574 out[outpos] = item;
568 outpos++; 575 outpos++;
569 count--; 576 count--;
577#ifndef __KERNEL__
578 if (map->choose_tries && ftotal <= map->choose_total_tries)
579 map->choose_tries[ftotal]++;
580#endif
570 } 581 }
571 582
572 dprintk("CHOOSE returns %d\n", outpos); 583 dprintk("CHOOSE returns %d\n", outpos);
@@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map,
610 } 621 }
611 622
612 for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) { 623 for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
624#ifdef DEBUG_INDEP
625 if (out2 && ftotal) {
626 dprintk("%u %d a: ", ftotal, left);
627 for (rep = outpos; rep < endpos; rep++) {
628 dprintk(" %d", out[rep]);
629 }
630 dprintk("\n");
631 dprintk("%u %d b: ", ftotal, left);
632 for (rep = outpos; rep < endpos; rep++) {
633 dprintk(" %d", out2[rep]);
634 }
635 dprintk("\n");
636 }
637#endif
613 for (rep = outpos; rep < endpos; rep++) { 638 for (rep = outpos; rep < endpos; rep++) {
614 if (out[rep] != CRUSH_ITEM_UNDEF) 639 if (out[rep] != CRUSH_ITEM_UNDEF)
615 continue; 640 continue;
@@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map,
726 out2[rep] = CRUSH_ITEM_NONE; 751 out2[rep] = CRUSH_ITEM_NONE;
727 } 752 }
728 } 753 }
754#ifndef __KERNEL__
755 if (map->choose_tries && ftotal <= map->choose_total_tries)
756 map->choose_tries[ftotal]++;
757#endif
758#ifdef DEBUG_INDEP
759 if (out2) {
760 dprintk("%u %d a: ", ftotal, left);
761 for (rep = outpos; rep < endpos; rep++) {
762 dprintk(" %d", out[rep]);
763 }
764 dprintk("\n");
765 dprintk("%u %d b: ", ftotal, left);
766 for (rep = outpos; rep < endpos; rep++) {
767 dprintk(" %d", out2[rep]);
768 }
769 dprintk("\n");
770 }
771#endif
729} 772}
730 773
731/** 774/**
@@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map,
790 833
791 switch (curstep->op) { 834 switch (curstep->op) {
792 case CRUSH_RULE_TAKE: 835 case CRUSH_RULE_TAKE:
793 w[0] = curstep->arg1; 836 if ((curstep->arg1 >= 0 &&
794 wsize = 1; 837 curstep->arg1 < map->max_devices) ||
838 (-1-curstep->arg1 < map->max_buckets &&
839 map->buckets[-1-curstep->arg1])) {
840 w[0] = curstep->arg1;
841 wsize = 1;
842 } else {
843 dprintk(" bad take value %d\n", curstep->arg1);
844 }
795 break; 845 break;
796 846
797 case CRUSH_RULE_SET_CHOOSE_TRIES: 847 case CRUSH_RULE_SET_CHOOSE_TRIES:
@@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map,
877 0); 927 0);
878 } else { 928 } else {
879 out_size = ((numrep < (result_max-osize)) ? 929 out_size = ((numrep < (result_max-osize)) ?
880 numrep : (result_max-osize)); 930 numrep : (result_max-osize));
881 crush_choose_indep( 931 crush_choose_indep(
882 map, 932 map,
883 map->buckets[-1-w[i]], 933 map->buckets[-1-w[i]],
@@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map,
923 } 973 }
924 return result_len; 974 return result_len;
925} 975}
926
927
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 073262fea6dd..1679f47280e2 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -278,7 +278,6 @@ static void _ceph_msgr_exit(void)
278 ceph_msgr_slab_exit(); 278 ceph_msgr_slab_exit();
279 279
280 BUG_ON(zero_page == NULL); 280 BUG_ON(zero_page == NULL);
281 kunmap(zero_page);
282 page_cache_release(zero_page); 281 page_cache_release(zero_page);
283 zero_page = NULL; 282 zero_page = NULL;
284} 283}
@@ -1545,7 +1544,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1545 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 1544 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
1546 &last_piece); 1545 &last_piece);
1547 ret = ceph_tcp_sendpage(con->sock, page, page_offset, 1546 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1548 length, last_piece); 1547 length, !last_piece);
1549 if (ret <= 0) { 1548 if (ret <= 0) {
1550 if (do_datacrc) 1549 if (do_datacrc)
1551 msg->footer.data_crc = cpu_to_le32(crc); 1550 msg->footer.data_crc = cpu_to_le32(crc);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2b3cf05e87b0..9d6ff1215928 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
298} 298}
299EXPORT_SYMBOL(ceph_monc_request_next_osdmap); 299EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
300 300
301/*
302 * Wait for an osdmap with a given epoch.
303 *
304 * @epoch: epoch to wait for
305 * @timeout: in jiffies, 0 means "wait forever"
306 */
301int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, 307int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
302 unsigned long timeout) 308 unsigned long timeout)
303{ 309{
304 unsigned long started = jiffies; 310 unsigned long started = jiffies;
305 int ret; 311 long ret;
306 312
307 mutex_lock(&monc->mutex); 313 mutex_lock(&monc->mutex);
308 while (monc->have_osdmap < epoch) { 314 while (monc->have_osdmap < epoch) {
309 mutex_unlock(&monc->mutex); 315 mutex_unlock(&monc->mutex);
310 316
311 if (timeout != 0 && time_after_eq(jiffies, started + timeout)) 317 if (timeout && time_after_eq(jiffies, started + timeout))
312 return -ETIMEDOUT; 318 return -ETIMEDOUT;
313 319
314 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 320 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
315 monc->have_osdmap >= epoch, timeout); 321 monc->have_osdmap >= epoch,
322 ceph_timeout_jiffies(timeout));
316 if (ret < 0) 323 if (ret < 0)
317 return ret; 324 return ret;
318 325
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c4ec9239249a..50033677c0fa 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
296 case CEPH_OSD_OP_CMPXATTR: 296 case CEPH_OSD_OP_CMPXATTR:
297 ceph_osd_data_release(&op->xattr.osd_data); 297 ceph_osd_data_release(&op->xattr.osd_data);
298 break; 298 break;
299 case CEPH_OSD_OP_STAT:
300 ceph_osd_data_release(&op->raw_data_in);
301 break;
299 default: 302 default:
300 break; 303 break;
301 } 304 }
@@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
450 */ 453 */
451static struct ceph_osd_req_op * 454static struct ceph_osd_req_op *
452_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, 455_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
453 u16 opcode) 456 u16 opcode, u32 flags)
454{ 457{
455 struct ceph_osd_req_op *op; 458 struct ceph_osd_req_op *op;
456 459
@@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
460 op = &osd_req->r_ops[which]; 463 op = &osd_req->r_ops[which];
461 memset(op, 0, sizeof (*op)); 464 memset(op, 0, sizeof (*op));
462 op->op = opcode; 465 op->op = opcode;
466 op->flags = flags;
463 467
464 return op; 468 return op;
465} 469}
466 470
467void osd_req_op_init(struct ceph_osd_request *osd_req, 471void osd_req_op_init(struct ceph_osd_request *osd_req,
468 unsigned int which, u16 opcode) 472 unsigned int which, u16 opcode, u32 flags)
469{ 473{
470 (void)_osd_req_op_init(osd_req, which, opcode); 474 (void)_osd_req_op_init(osd_req, which, opcode, flags);
471} 475}
472EXPORT_SYMBOL(osd_req_op_init); 476EXPORT_SYMBOL(osd_req_op_init);
473 477
@@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
476 u64 offset, u64 length, 480 u64 offset, u64 length,
477 u64 truncate_size, u32 truncate_seq) 481 u64 truncate_size, u32 truncate_seq)
478{ 482{
479 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 483 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
484 opcode, 0);
480 size_t payload_len = 0; 485 size_t payload_len = 0;
481 486
482 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && 487 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
@@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update);
515void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 520void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
516 u16 opcode, const char *class, const char *method) 521 u16 opcode, const char *class, const char *method)
517{ 522{
518 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 523 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
524 opcode, 0);
519 struct ceph_pagelist *pagelist; 525 struct ceph_pagelist *pagelist;
520 size_t payload_len = 0; 526 size_t payload_len = 0;
521 size_t size; 527 size_t size;
@@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
552 u16 opcode, const char *name, const void *value, 558 u16 opcode, const char *name, const void *value,
553 size_t size, u8 cmp_op, u8 cmp_mode) 559 size_t size, u8 cmp_op, u8 cmp_mode)
554{ 560{
555 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 561 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
562 opcode, 0);
556 struct ceph_pagelist *pagelist; 563 struct ceph_pagelist *pagelist;
557 size_t payload_len; 564 size_t payload_len;
558 565
@@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
585 unsigned int which, u16 opcode, 592 unsigned int which, u16 opcode,
586 u64 cookie, u64 version, int flag) 593 u64 cookie, u64 version, int flag)
587{ 594{
588 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); 595 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
596 opcode, 0);
589 597
590 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); 598 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
591 599
@@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
602 u64 expected_write_size) 610 u64 expected_write_size)
603{ 611{
604 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, 612 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
605 CEPH_OSD_OP_SETALLOCHINT); 613 CEPH_OSD_OP_SETALLOCHINT,
614 0);
606 615
607 op->alloc_hint.expected_object_size = expected_object_size; 616 op->alloc_hint.expected_object_size = expected_object_size;
608 op->alloc_hint.expected_write_size = expected_write_size; 617 op->alloc_hint.expected_write_size = expected_write_size;
@@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
786 } 795 }
787 796
788 if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { 797 if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
789 osd_req_op_init(req, which, opcode); 798 osd_req_op_init(req, which, opcode, 0);
790 } else { 799 } else {
791 u32 object_size = le32_to_cpu(layout->fl_object_size); 800 u32 object_size = le32_to_cpu(layout->fl_object_size);
792 u32 object_base = off - objoff; 801 u32 object_base = off - objoff;
@@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
1088 BUG_ON(!list_empty(&osd->o_osd_lru)); 1097 BUG_ON(!list_empty(&osd->o_osd_lru));
1089 1098
1090 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 1099 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
1091 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; 1100 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
1092} 1101}
1093 1102
1094static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, 1103static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
1199static void __schedule_osd_timeout(struct ceph_osd_client *osdc) 1208static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
1200{ 1209{
1201 schedule_delayed_work(&osdc->timeout_work, 1210 schedule_delayed_work(&osdc->timeout_work,
1202 osdc->client->options->osd_keepalive_timeout * HZ); 1211 osdc->client->options->osd_keepalive_timeout);
1203} 1212}
1204 1213
1205static void __cancel_osd_timeout(struct ceph_osd_client *osdc) 1214static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
1567{ 1576{
1568 struct ceph_osd_client *osdc = 1577 struct ceph_osd_client *osdc =
1569 container_of(work, struct ceph_osd_client, timeout_work.work); 1578 container_of(work, struct ceph_osd_client, timeout_work.work);
1579 struct ceph_options *opts = osdc->client->options;
1570 struct ceph_osd_request *req; 1580 struct ceph_osd_request *req;
1571 struct ceph_osd *osd; 1581 struct ceph_osd *osd;
1572 unsigned long keepalive =
1573 osdc->client->options->osd_keepalive_timeout * HZ;
1574 struct list_head slow_osds; 1582 struct list_head slow_osds;
1575 dout("timeout\n"); 1583 dout("timeout\n");
1576 down_read(&osdc->map_sem); 1584 down_read(&osdc->map_sem);
@@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
1586 */ 1594 */
1587 INIT_LIST_HEAD(&slow_osds); 1595 INIT_LIST_HEAD(&slow_osds);
1588 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 1596 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
1589 if (time_before(jiffies, req->r_stamp + keepalive)) 1597 if (time_before(jiffies,
1598 req->r_stamp + opts->osd_keepalive_timeout))
1590 break; 1599 break;
1591 1600
1592 osd = req->r_osd; 1601 osd = req->r_osd;
@@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
1613 struct ceph_osd_client *osdc = 1622 struct ceph_osd_client *osdc =
1614 container_of(work, struct ceph_osd_client, 1623 container_of(work, struct ceph_osd_client,
1615 osds_timeout_work.work); 1624 osds_timeout_work.work);
1616 unsigned long delay = 1625 unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
1617 osdc->client->options->osd_idle_ttl * HZ >> 2;
1618 1626
1619 dout("osds timeout\n"); 1627 dout("osds timeout\n");
1620 down_read(&osdc->map_sem); 1628 down_read(&osdc->map_sem);
@@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2619 osdc->event_count = 0; 2627 osdc->event_count = 0;
2620 2628
2621 schedule_delayed_work(&osdc->osds_timeout_work, 2629 schedule_delayed_work(&osdc->osds_timeout_work,
2622 round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); 2630 round_jiffies_relative(osdc->client->options->osd_idle_ttl));
2623 2631
2624 err = -ENOMEM; 2632 err = -ENOMEM;
2625 osdc->req_mempool = mempool_create_kmalloc_pool(10, 2633 osdc->req_mempool = mempool_create_kmalloc_pool(10,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 15796696d64e..4a3125836b64 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end,
89{ 89{
90 int j; 90 int j;
91 dout("crush_decode_tree_bucket %p to %p\n", *p, end); 91 dout("crush_decode_tree_bucket %p to %p\n", *p, end);
92 ceph_decode_32_safe(p, end, b->num_nodes, bad); 92 ceph_decode_8_safe(p, end, b->num_nodes, bad);
93 b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); 93 b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
94 if (b->node_weights == NULL) 94 if (b->node_weights == NULL)
95 return -ENOMEM; 95 return -ENOMEM;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 096d91447e06..d4f5f220a8e5 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
51 set_page_dirty_lock(pages[i]); 51 set_page_dirty_lock(pages[i]);
52 put_page(pages[i]); 52 put_page(pages[i]);
53 } 53 }
54 if (is_vmalloc_addr(pages)) 54 kvfree(pages);
55 vfree(pages);
56 else
57 kfree(pages);
58} 55}
59EXPORT_SYMBOL(ceph_put_page_vector); 56EXPORT_SYMBOL(ceph_put_page_vector);
60 57