aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-28 14:02:23 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-28 14:02:23 -0500
commitd891ea23d5203e5c47439b2a174f86a00b356a6c (patch)
tree3876cefcced9df5519f437cd8eb275cb979b93f6 /include
parent08d21b5f93eb92a781daea71b6fcb3a340909141 (diff)
parent125d725c923527a85876c031028c7f55c28b74b3 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil: "This is a big batch. From Ilya we have: - rbd support for more than ~250 mapped devices (now uses same scheme that SCSI does for device major/minor numbering) - crush updates for new mapping behaviors (will be needed for coming erasure coding support, among other things) - preliminary support for tiered storage pools There is also a big series fixing a pile cephfs bugs with clustered MDSs from Yan Zheng, ACL support for cephfs from Guangliang Zhao, ceph fscache improvements from Li Wang, improved behavior when we get ENOSPC from Josh Durgin, some readv/writev improvements from Majianpeng, and the usual mix of small cleanups" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (76 commits) ceph: cast PAGE_SIZE to size_t in ceph_sync_write() ceph: fix dout() compile warnings in ceph_filemap_fault() libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature libceph: follow redirect replies from osds libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid} libceph: follow {read,write}_tier fields on osd request submission libceph: add ceph_pg_pool_by_id() libceph: CEPH_OSD_FLAG_* enum update libceph: replace ceph_calc_ceph_pg() with ceph_oloc_oid_to_pg() libceph: introduce and start using oid abstraction libceph: rename MAX_OBJ_NAME_SIZE to CEPH_MAX_OID_NAME_LEN libceph: move ceph_file_layout helpers to ceph_fs.h libceph: start using oloc abstraction libceph: dout() is missing a newline libceph: add ceph_kv{malloc,free}() and switch to them libceph: support CEPH_FEATURE_EXPORT_PEER ceph: add imported caps when handling cap export message ceph: add open export target session helper ceph: remove exported caps when handling cap import message ceph: handle session flush message ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/buffer.h1
-rw-r--r--include/linux/ceph/ceph_features.h101
-rw-r--r--include/linux/ceph/ceph_fs.h36
-rw-r--r--include/linux/ceph/libceph.h19
-rw-r--r--include/linux/ceph/messenger.h13
-rw-r--r--include/linux/ceph/osd_client.h19
-rw-r--r--include/linux/ceph/osdmap.h66
-rw-r--r--include/linux/ceph/rados.h4
-rw-r--r--include/linux/crush/crush.h20
-rw-r--r--include/linux/crush/mapper.h3
10 files changed, 192 insertions, 90 deletions
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 58d19014068f..07ad423cc37f 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -17,7 +17,6 @@ struct ceph_buffer {
17 struct kref kref; 17 struct kref kref;
18 struct kvec vec; 18 struct kvec vec;
19 size_t alloc_len; 19 size_t alloc_len;
20 bool is_vmalloc;
21}; 20};
22 21
23extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp); 22extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 4c42080347af..138448f766b4 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -4,42 +4,73 @@
4/* 4/*
5 * feature bits 5 * feature bits
6 */ 6 */
7#define CEPH_FEATURE_UID (1<<0) 7#define CEPH_FEATURE_UID (1ULL<<0)
8#define CEPH_FEATURE_NOSRCADDR (1<<1) 8#define CEPH_FEATURE_NOSRCADDR (1ULL<<1)
9#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) 9#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2)
10#define CEPH_FEATURE_FLOCK (1<<3) 10#define CEPH_FEATURE_FLOCK (1ULL<<3)
11#define CEPH_FEATURE_SUBSCRIBE2 (1<<4) 11#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4)
12#define CEPH_FEATURE_MONNAMES (1<<5) 12#define CEPH_FEATURE_MONNAMES (1ULL<<5)
13#define CEPH_FEATURE_RECONNECT_SEQ (1<<6) 13#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6)
14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) 14#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7)
15#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) 15#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8)
16#define CEPH_FEATURE_PGID64 (1<<9) 16#define CEPH_FEATURE_PGID64 (1ULL<<9)
17#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) 17#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10)
18#define CEPH_FEATURE_PGPOOL3 (1<<11) 18#define CEPH_FEATURE_PGPOOL3 (1ULL<<11)
19#define CEPH_FEATURE_OSDREPLYMUX (1<<12) 19#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12)
20#define CEPH_FEATURE_OSDENC (1<<13) 20#define CEPH_FEATURE_OSDENC (1ULL<<13)
21#define CEPH_FEATURE_OMAP (1<<14) 21#define CEPH_FEATURE_OMAP (1ULL<<14)
22#define CEPH_FEATURE_MONENC (1<<15) 22#define CEPH_FEATURE_MONENC (1ULL<<15)
23#define CEPH_FEATURE_QUERY_T (1<<16) 23#define CEPH_FEATURE_QUERY_T (1ULL<<16)
24#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) 24#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17)
25#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) 25#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
26#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) 26#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19)
27#define CEPH_FEATURE_MON_NULLROUTE (1<<20) 27#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20)
28#define CEPH_FEATURE_MON_GV (1<<21) 28#define CEPH_FEATURE_MON_GV (1ULL<<21)
29#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) 29#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
30#define CEPH_FEATURE_MSG_AUTH (1<<23) 30#define CEPH_FEATURE_MSG_AUTH (1ULL<<23)
31#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) 31#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
32#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) 32#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
33#define CEPH_FEATURE_CREATEPOOLID (1<<26) 33#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26)
34#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) 34#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27)
35#define CEPH_FEATURE_OSD_HBMSGS (1<<28) 35#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28)
36#define CEPH_FEATURE_MDSENC (1<<29) 36#define CEPH_FEATURE_MDSENC (1ULL<<29)
37#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) 37#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30)
38#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
39#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
40#define CEPH_FEATURE_MON_SCRUB (1ULL<<33)
41#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
42#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
43#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
44#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
45#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
46
47/*
48 * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
49 * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63
50 * to mean 33 bit ~0, and introduce a helper below to do the
51 * translation.
52 *
53 * This was introduced by ceph.git commit
54 * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
55 * and fixed by ceph.git commit
56 * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
57 */
58#define CEPH_FEATURE_RESERVED (1ULL<<63)
59
60static inline u64 ceph_sanitize_features(u64 features)
61{
62 if (features & CEPH_FEATURE_RESERVED) {
63 /* everything through OSD_SNAPMAPPER */
64 return 0x1ffffffffull;
65 } else {
66 return features;
67 }
68}
38 69
39/* 70/*
40 * Features supported. 71 * Features supported.
41 */ 72 */
42#define CEPH_FEATURES_SUPPORTED_DEFAULT \ 73#define CEPH_FEATURES_SUPPORTED_DEFAULT \
43 (CEPH_FEATURE_NOSRCADDR | \ 74 (CEPH_FEATURE_NOSRCADDR | \
44 CEPH_FEATURE_RECONNECT_SEQ | \ 75 CEPH_FEATURE_RECONNECT_SEQ | \
45 CEPH_FEATURE_PGID64 | \ 76 CEPH_FEATURE_PGID64 | \
@@ -48,7 +79,10 @@
48 CEPH_FEATURE_CRUSH_TUNABLES | \ 79 CEPH_FEATURE_CRUSH_TUNABLES | \
49 CEPH_FEATURE_CRUSH_TUNABLES2 | \ 80 CEPH_FEATURE_CRUSH_TUNABLES2 | \
50 CEPH_FEATURE_REPLY_CREATE_INODE | \ 81 CEPH_FEATURE_REPLY_CREATE_INODE | \
51 CEPH_FEATURE_OSDHASHPSPOOL) 82 CEPH_FEATURE_OSDHASHPSPOOL | \
83 CEPH_FEATURE_OSD_CACHEPOOL | \
84 CEPH_FEATURE_CRUSH_V2 | \
85 CEPH_FEATURE_EXPORT_PEER)
52 86
53#define CEPH_FEATURES_REQUIRED_DEFAULT \ 87#define CEPH_FEATURES_REQUIRED_DEFAULT \
54 (CEPH_FEATURE_NOSRCADDR | \ 88 (CEPH_FEATURE_NOSRCADDR | \
@@ -56,4 +90,5 @@
56 CEPH_FEATURE_PGID64 | \ 90 CEPH_FEATURE_PGID64 | \
57 CEPH_FEATURE_PGPOOL3 | \ 91 CEPH_FEATURE_PGPOOL3 | \
58 CEPH_FEATURE_OSDENC) 92 CEPH_FEATURE_OSDENC)
93
59#endif 94#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 2ad7b860f062..2623cffc73a1 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -53,6 +53,29 @@ struct ceph_file_layout {
53 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ 53 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
54} __attribute__ ((packed)); 54} __attribute__ ((packed));
55 55
56#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
57#define ceph_file_layout_stripe_count(l) \
58 ((__s32)le32_to_cpu((l).fl_stripe_count))
59#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
60#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
61#define ceph_file_layout_object_su(l) \
62 ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
63#define ceph_file_layout_pg_pool(l) \
64 ((__s32)le32_to_cpu((l).fl_pg_pool))
65
66static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
67{
68 return le32_to_cpu(l->fl_stripe_unit) *
69 le32_to_cpu(l->fl_stripe_count);
70}
71
72/* "period" == bytes before i start on a new set of objects */
73static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
74{
75 return le32_to_cpu(l->fl_object_size) *
76 le32_to_cpu(l->fl_stripe_count);
77}
78
56#define CEPH_MIN_STRIPE_UNIT 65536 79#define CEPH_MIN_STRIPE_UNIT 65536
57 80
58int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); 81int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
@@ -282,6 +305,8 @@ enum {
282 CEPH_SESSION_RENEWCAPS, 305 CEPH_SESSION_RENEWCAPS,
283 CEPH_SESSION_STALE, 306 CEPH_SESSION_STALE,
284 CEPH_SESSION_RECALL_STATE, 307 CEPH_SESSION_RECALL_STATE,
308 CEPH_SESSION_FLUSHMSG,
309 CEPH_SESSION_FLUSHMSG_ACK,
285}; 310};
286 311
287extern const char *ceph_session_op_name(int op); 312extern const char *ceph_session_op_name(int op);
@@ -457,7 +482,8 @@ struct ceph_mds_reply_cap {
457 __u8 flags; /* CEPH_CAP_FLAG_* */ 482 __u8 flags; /* CEPH_CAP_FLAG_* */
458} __attribute__ ((packed)); 483} __attribute__ ((packed));
459 484
460#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ 485#define CEPH_CAP_FLAG_AUTH (1 << 0) /* cap is issued by auth mds */
486#define CEPH_CAP_FLAG_RELEASE (1 << 1) /* release the cap */
461 487
462/* inode record, for bundling with mds reply */ 488/* inode record, for bundling with mds reply */
463struct ceph_mds_reply_inode { 489struct ceph_mds_reply_inode {
@@ -658,6 +684,14 @@ struct ceph_mds_caps {
658 __le32 time_warp_seq; 684 __le32 time_warp_seq;
659} __attribute__ ((packed)); 685} __attribute__ ((packed));
660 686
687struct ceph_mds_cap_peer {
688 __le64 cap_id;
689 __le32 seq;
690 __le32 mseq;
691 __le32 mds;
692 __u8 flags;
693} __attribute__ ((packed));
694
661/* cap release msg head */ 695/* cap release msg head */
662struct ceph_mds_cap_release { 696struct ceph_mds_cap_release {
663 __le32 num; /* number of cap_items that follow */ 697 __le32 num; /* number of cap_items that follow */
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2e3024881a5e..2f49aa4c4f7f 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -122,8 +122,8 @@ struct ceph_client {
122 122
123 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); 123 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
124 124
125 u32 supported_features; 125 u64 supported_features;
126 u32 required_features; 126 u64 required_features;
127 127
128 struct ceph_messenger msgr; /* messenger instance */ 128 struct ceph_messenger msgr; /* messenger instance */
129 struct ceph_mon_client monc; 129 struct ceph_mon_client monc;
@@ -173,15 +173,18 @@ static inline int calc_pages_for(u64 off, u64 len)
173 (off >> PAGE_CACHE_SHIFT); 173 (off >> PAGE_CACHE_SHIFT);
174} 174}
175 175
176extern struct kmem_cache *ceph_inode_cachep;
177extern struct kmem_cache *ceph_cap_cachep;
178extern struct kmem_cache *ceph_dentry_cachep;
179extern struct kmem_cache *ceph_file_cachep;
180
176/* ceph_common.c */ 181/* ceph_common.c */
177extern bool libceph_compatible(void *data); 182extern bool libceph_compatible(void *data);
178 183
179extern const char *ceph_msg_type_name(int type); 184extern const char *ceph_msg_type_name(int type);
180extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); 185extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
181extern struct kmem_cache *ceph_inode_cachep; 186extern void *ceph_kvmalloc(size_t size, gfp_t flags);
182extern struct kmem_cache *ceph_cap_cachep; 187extern void ceph_kvfree(const void *ptr);
183extern struct kmem_cache *ceph_dentry_cachep;
184extern struct kmem_cache *ceph_file_cachep;
185 188
186extern struct ceph_options *ceph_parse_options(char *options, 189extern struct ceph_options *ceph_parse_options(char *options,
187 const char *dev_name, const char *dev_name_end, 190 const char *dev_name, const char *dev_name_end,
@@ -192,8 +195,8 @@ extern int ceph_compare_options(struct ceph_options *new_opt,
192 struct ceph_client *client); 195 struct ceph_client *client);
193extern struct ceph_client *ceph_create_client(struct ceph_options *opt, 196extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
194 void *private, 197 void *private,
195 unsigned supported_features, 198 u64 supported_features,
196 unsigned required_features); 199 u64 required_features);
197extern u64 ceph_client_id(struct ceph_client *client); 200extern u64 ceph_client_id(struct ceph_client *client);
198extern void ceph_destroy_client(struct ceph_client *client); 201extern void ceph_destroy_client(struct ceph_client *client);
199extern int __ceph_open_session(struct ceph_client *client, 202extern int __ceph_open_session(struct ceph_client *client,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 7c1420bb1dce..20ee8b63a968 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -60,8 +60,8 @@ struct ceph_messenger {
60 u32 global_seq; 60 u32 global_seq;
61 spinlock_t global_seq_lock; 61 spinlock_t global_seq_lock;
62 62
63 u32 supported_features; 63 u64 supported_features;
64 u32 required_features; 64 u64 required_features;
65}; 65};
66 66
67enum ceph_msg_data_type { 67enum ceph_msg_data_type {
@@ -154,10 +154,9 @@ struct ceph_msg {
154 struct list_head list_head; /* links for connection lists */ 154 struct list_head list_head; /* links for connection lists */
155 155
156 struct kref kref; 156 struct kref kref;
157 bool front_is_vmalloc;
158 bool more_to_follow; 157 bool more_to_follow;
159 bool needs_out_seq; 158 bool needs_out_seq;
160 int front_max; 159 int front_alloc_len;
161 unsigned long ack_stamp; /* tx: when we were acked */ 160 unsigned long ack_stamp; /* tx: when we were acked */
162 161
163 struct ceph_msgpool *pool; 162 struct ceph_msgpool *pool;
@@ -192,7 +191,7 @@ struct ceph_connection {
192 191
193 struct ceph_entity_name peer_name; /* peer name */ 192 struct ceph_entity_name peer_name; /* peer name */
194 193
195 unsigned peer_features; 194 u64 peer_features;
196 u32 connect_seq; /* identify the most recent connection 195 u32 connect_seq; /* identify the most recent connection
197 attempt for this connection, client */ 196 attempt for this connection, client */
198 u32 peer_global_seq; /* peer's global seq for this connection */ 197 u32 peer_global_seq; /* peer's global seq for this connection */
@@ -256,8 +255,8 @@ extern void ceph_msgr_flush(void);
256 255
257extern void ceph_messenger_init(struct ceph_messenger *msgr, 256extern void ceph_messenger_init(struct ceph_messenger *msgr,
258 struct ceph_entity_addr *myaddr, 257 struct ceph_entity_addr *myaddr,
259 u32 supported_features, 258 u64 supported_features,
260 u32 required_features, 259 u64 required_features,
261 bool nocrc); 260 bool nocrc);
262 261
263extern void ceph_con_init(struct ceph_connection *con, void *private, 262extern void ceph_con_init(struct ceph_connection *con, void *private,
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 8f47625a0661..fd47e872ebcc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -12,12 +12,6 @@
12#include <linux/ceph/auth.h> 12#include <linux/ceph/auth.h>
13#include <linux/ceph/pagelist.h> 13#include <linux/ceph/pagelist.h>
14 14
15/*
16 * Maximum object name size
17 * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100)
18 */
19#define MAX_OBJ_NAME_SIZE 100
20
21struct ceph_msg; 15struct ceph_msg;
22struct ceph_snap_context; 16struct ceph_snap_context;
23struct ceph_osd_request; 17struct ceph_osd_request;
@@ -138,6 +132,7 @@ struct ceph_osd_request {
138 __le64 *r_request_pool; 132 __le64 *r_request_pool;
139 void *r_request_pgid; 133 void *r_request_pgid;
140 __le32 *r_request_attempts; 134 __le32 *r_request_attempts;
135 bool r_paused;
141 struct ceph_eversion *r_request_reassert_version; 136 struct ceph_eversion *r_request_reassert_version;
142 137
143 int r_result; 138 int r_result;
@@ -158,15 +153,21 @@ struct ceph_osd_request {
158 struct inode *r_inode; /* for use by callbacks */ 153 struct inode *r_inode; /* for use by callbacks */
159 void *r_priv; /* ditto */ 154 void *r_priv; /* ditto */
160 155
161 char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ 156 struct ceph_object_locator r_base_oloc;
162 int r_oid_len; 157 struct ceph_object_id r_base_oid;
158 struct ceph_object_locator r_target_oloc;
159 struct ceph_object_id r_target_oid;
160
163 u64 r_snapid; 161 u64 r_snapid;
164 unsigned long r_stamp; /* send OR check time */ 162 unsigned long r_stamp; /* send OR check time */
165 163
166 struct ceph_file_layout r_file_layout;
167 struct ceph_snap_context *r_snapc; /* snap context for writes */ 164 struct ceph_snap_context *r_snapc; /* snap context for writes */
168}; 165};
169 166
167struct ceph_request_redirect {
168 struct ceph_object_locator oloc;
169};
170
170struct ceph_osd_event { 171struct ceph_osd_event {
171 u64 cookie; 172 u64 cookie;
172 int one_shot; 173 int one_shot;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index d05cc4451af6..49ff69f0746b 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -35,13 +35,26 @@ struct ceph_pg_pool_info {
35 u8 object_hash; 35 u8 object_hash;
36 u32 pg_num, pgp_num; 36 u32 pg_num, pgp_num;
37 int pg_num_mask, pgp_num_mask; 37 int pg_num_mask, pgp_num_mask;
38 s64 read_tier;
39 s64 write_tier; /* wins for read+write ops */
38 u64 flags; 40 u64 flags;
39 char *name; 41 char *name;
40}; 42};
41 43
42struct ceph_object_locator { 44struct ceph_object_locator {
43 uint64_t pool; 45 s64 pool;
44 char *key; 46};
47
48/*
49 * Maximum supported by kernel client object name length
50 *
51 * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
52 */
53#define CEPH_MAX_OID_NAME_LEN 100
54
55struct ceph_object_id {
56 char name[CEPH_MAX_OID_NAME_LEN];
57 int name_len;
45}; 58};
46 59
47struct ceph_pg_mapping { 60struct ceph_pg_mapping {
@@ -73,33 +86,30 @@ struct ceph_osdmap {
73 struct crush_map *crush; 86 struct crush_map *crush;
74}; 87};
75 88
76/* 89static inline void ceph_oid_set_name(struct ceph_object_id *oid,
77 * file layout helpers 90 const char *name)
78 */
79#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
80#define ceph_file_layout_stripe_count(l) \
81 ((__s32)le32_to_cpu((l).fl_stripe_count))
82#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
83#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
84#define ceph_file_layout_object_su(l) \
85 ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
86#define ceph_file_layout_pg_pool(l) \
87 ((__s32)le32_to_cpu((l).fl_pg_pool))
88
89static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
90{ 91{
91 return le32_to_cpu(l->fl_stripe_unit) * 92 int len;
92 le32_to_cpu(l->fl_stripe_count); 93
94 len = strlen(name);
95 if (len > sizeof(oid->name)) {
96 WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n",
97 name, len, sizeof(oid->name));
98 len = sizeof(oid->name);
99 }
100
101 memcpy(oid->name, name, len);
102 oid->name_len = len;
93} 103}
94 104
95/* "period" == bytes before i start on a new set of objects */ 105static inline void ceph_oid_copy(struct ceph_object_id *dest,
96static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) 106 struct ceph_object_id *src)
97{ 107{
98 return le32_to_cpu(l->fl_object_size) * 108 BUG_ON(src->name_len > sizeof(dest->name));
99 le32_to_cpu(l->fl_stripe_count); 109 memcpy(dest->name, src->name, src->name_len);
110 dest->name_len = src->name_len;
100} 111}
101 112
102
103static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) 113static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
104{ 114{
105 return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); 115 return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
@@ -155,14 +165,20 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
155 u64 *bno, u64 *oxoff, u64 *oxlen); 165 u64 *bno, u64 *oxoff, u64 *oxlen);
156 166
157/* calculate mapping of object to a placement group */ 167/* calculate mapping of object to a placement group */
158extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, 168extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
159 struct ceph_osdmap *osdmap, uint64_t pool); 169 struct ceph_object_locator *oloc,
170 struct ceph_object_id *oid,
171 struct ceph_pg *pg_out);
172
160extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, 173extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
161 struct ceph_pg pgid, 174 struct ceph_pg pgid,
162 int *acting); 175 int *acting);
163extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 176extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
164 struct ceph_pg pgid); 177 struct ceph_pg pgid);
165 178
179extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
180 u64 id);
181
166extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); 182extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
167extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); 183extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
168 184
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 68c96a508ac2..96292df4041b 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -344,6 +344,10 @@ enum {
344 CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ 344 CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
345 CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ 345 CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
346 CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ 346 CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
347 CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */
348 CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */
349 CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
350 CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */
347}; 351};
348 352
349enum { 353enum {
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 6a1101f24cfb..acaa5615d634 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -19,11 +19,12 @@
19 19
20#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ 20#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
21 21
22
23#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ 22#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
24#define CRUSH_MAX_SET 10 /* max size of a mapping result */
25 23
26 24
25#define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */
26#define CRUSH_ITEM_NONE 0x7fffffff /* no result */
27
27/* 28/*
28 * CRUSH uses user-defined "rules" to describe how inputs should be 29 * CRUSH uses user-defined "rules" to describe how inputs should be
29 * mapped to devices. A rule consists of sequence of steps to perform 30 * mapped to devices. A rule consists of sequence of steps to perform
@@ -43,8 +44,13 @@ enum {
43 /* arg2 = type */ 44 /* arg2 = type */
44 CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ 45 CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
45 CRUSH_RULE_EMIT = 4, /* no args */ 46 CRUSH_RULE_EMIT = 4, /* no args */
46 CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, 47 CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
47 CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, 48 CRUSH_RULE_CHOOSELEAF_INDEP = 7,
49
50 CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
51 CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
52 CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
53 CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
48}; 54};
49 55
50/* 56/*
@@ -162,7 +168,10 @@ struct crush_map {
162 __u32 choose_local_fallback_tries; 168 __u32 choose_local_fallback_tries;
163 /* choose attempts before giving up */ 169 /* choose attempts before giving up */
164 __u32 choose_total_tries; 170 __u32 choose_total_tries;
165 /* attempt chooseleaf inner descent once; on failure retry outer descent */ 171 /* attempt chooseleaf inner descent once for firstn mode; on
172 * reject retry outer descent. Note that this does *not*
173 * apply to a collision: in that case we will retry as we used
174 * to. */
166 __u32 chooseleaf_descend_once; 175 __u32 chooseleaf_descend_once;
167}; 176};
168 177
@@ -174,6 +183,7 @@ extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
174extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); 183extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
175extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); 184extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
176extern void crush_destroy_bucket(struct crush_bucket *b); 185extern void crush_destroy_bucket(struct crush_bucket *b);
186extern void crush_destroy_rule(struct crush_rule *r);
177extern void crush_destroy(struct crush_map *map); 187extern void crush_destroy(struct crush_map *map);
178 188
179static inline int crush_calc_tree_node(int i) 189static inline int crush_calc_tree_node(int i)
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 5772dee3ecbf..eab367446eea 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -14,6 +14,7 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i
14extern int crush_do_rule(const struct crush_map *map, 14extern int crush_do_rule(const struct crush_map *map,
15 int ruleno, 15 int ruleno,
16 int x, int *result, int result_max, 16 int x, int *result, int result_max,
17 const __u32 *weights); 17 const __u32 *weights, int weight_max,
18 int *scratch);
18 19
19#endif 20#endif