diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-06 16:11:19 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-06 16:11:19 -0400 |
| commit | 91f8575685e35f3bd021286bc82d26397458f5a9 (patch) | |
| tree | 09de8d889758a12071adb9427ed741e27c907aa6 /include/linux | |
| parent | 2e378f3eebd28feefbb1f9953834a5a19482f053 (diff) | |
| parent | b5b09be30cf99f9c699e825629f02e3bce555d44 (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph changes from Alex Elder:
"This is a big pull.
Most of it is culmination of Alex's work to implement RBD image
layering, which is now complete (yay!).
There is also some work from Yan to fix i_mutex behavior surrounding
writes in cephfs, a sync write fix, a fix for RBD images that get
resized while they are mapped, and a few patches from me that resolve
annoying auth warnings and fix several bugs in the ceph auth code."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (254 commits)
rbd: fix image request leak on parent read
libceph: use slab cache for osd client requests
libceph: allocate ceph message data with a slab allocator
libceph: allocate ceph messages with a slab allocator
rbd: allocate image object names with a slab allocator
rbd: allocate object requests with a slab allocator
rbd: allocate name separate from obj_request
rbd: allocate image requests with a slab allocator
rbd: use binary search for snapshot lookup
rbd: clear EXISTS flag if mapped snapshot disappears
rbd: kill off the snapshot list
rbd: define rbd_snap_size() and rbd_snap_features()
rbd: use snap_id not index to look up snap info
rbd: look up snapshot name in names buffer
rbd: drop obj_request->version
rbd: drop rbd_obj_method_sync() version parameter
rbd: more version parameter removal
rbd: get rid of some version parameters
rbd: stop tracking header object version
rbd: snap names are pointer to constant data
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/ceph/auth.h | 18 | ||||
| -rw-r--r-- | include/linux/ceph/ceph_features.h | 2 | ||||
| -rw-r--r-- | include/linux/ceph/decode.h | 30 | ||||
| -rw-r--r-- | include/linux/ceph/libceph.h | 31 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h | 104 | ||||
| -rw-r--r-- | include/linux/ceph/msgr.h | 1 | ||||
| -rw-r--r-- | include/linux/ceph/osd_client.h | 204 | ||||
| -rw-r--r-- | include/linux/ceph/osdmap.h | 30 |
8 files changed, 299 insertions, 121 deletions
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index d4080f309b56..5f3386844134 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h | |||
| @@ -52,6 +52,9 @@ struct ceph_auth_client_ops { | |||
| 52 | */ | 52 | */ |
| 53 | int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, | 53 | int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, |
| 54 | struct ceph_auth_handshake *auth); | 54 | struct ceph_auth_handshake *auth); |
| 55 | /* ensure that an existing authorizer is up to date */ | ||
| 56 | int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, | ||
| 57 | struct ceph_auth_handshake *auth); | ||
| 55 | int (*verify_authorizer_reply)(struct ceph_auth_client *ac, | 58 | int (*verify_authorizer_reply)(struct ceph_auth_client *ac, |
| 56 | struct ceph_authorizer *a, size_t len); | 59 | struct ceph_authorizer *a, size_t len); |
| 57 | void (*destroy_authorizer)(struct ceph_auth_client *ac, | 60 | void (*destroy_authorizer)(struct ceph_auth_client *ac, |
| @@ -75,6 +78,8 @@ struct ceph_auth_client { | |||
| 75 | u64 global_id; /* our unique id in system */ | 78 | u64 global_id; /* our unique id in system */ |
| 76 | const struct ceph_crypto_key *key; /* our secret key */ | 79 | const struct ceph_crypto_key *key; /* our secret key */ |
| 77 | unsigned want_keys; /* which services we want */ | 80 | unsigned want_keys; /* which services we want */ |
| 81 | |||
| 82 | struct mutex mutex; | ||
| 78 | }; | 83 | }; |
| 79 | 84 | ||
| 80 | extern struct ceph_auth_client *ceph_auth_init(const char *name, | 85 | extern struct ceph_auth_client *ceph_auth_init(const char *name, |
| @@ -94,5 +99,18 @@ extern int ceph_build_auth(struct ceph_auth_client *ac, | |||
| 94 | void *msg_buf, size_t msg_len); | 99 | void *msg_buf, size_t msg_len); |
| 95 | 100 | ||
| 96 | extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); | 101 | extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); |
| 102 | extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, | ||
| 103 | int peer_type, | ||
| 104 | struct ceph_auth_handshake *auth); | ||
| 105 | extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, | ||
| 106 | struct ceph_authorizer *a); | ||
| 107 | extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, | ||
| 108 | int peer_type, | ||
| 109 | struct ceph_auth_handshake *a); | ||
| 110 | extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, | ||
| 111 | struct ceph_authorizer *a, | ||
| 112 | size_t len); | ||
| 113 | extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, | ||
| 114 | int peer_type); | ||
| 97 | 115 | ||
| 98 | #endif | 116 | #endif |
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 76554cecaab2..4c42080347af 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | */ | 41 | */ |
| 42 | #define CEPH_FEATURES_SUPPORTED_DEFAULT \ | 42 | #define CEPH_FEATURES_SUPPORTED_DEFAULT \ |
| 43 | (CEPH_FEATURE_NOSRCADDR | \ | 43 | (CEPH_FEATURE_NOSRCADDR | \ |
| 44 | CEPH_FEATURE_RECONNECT_SEQ | \ | ||
| 44 | CEPH_FEATURE_PGID64 | \ | 45 | CEPH_FEATURE_PGID64 | \ |
| 45 | CEPH_FEATURE_PGPOOL3 | \ | 46 | CEPH_FEATURE_PGPOOL3 | \ |
| 46 | CEPH_FEATURE_OSDENC | \ | 47 | CEPH_FEATURE_OSDENC | \ |
| @@ -51,6 +52,7 @@ | |||
| 51 | 52 | ||
| 52 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ | 53 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ |
| 53 | (CEPH_FEATURE_NOSRCADDR | \ | 54 | (CEPH_FEATURE_NOSRCADDR | \ |
| 55 | CEPH_FEATURE_RECONNECT_SEQ | \ | ||
| 54 | CEPH_FEATURE_PGID64 | \ | 56 | CEPH_FEATURE_PGID64 | \ |
| 55 | CEPH_FEATURE_PGPOOL3 | \ | 57 | CEPH_FEATURE_PGPOOL3 | \ |
| 56 | CEPH_FEATURE_OSDENC) | 58 | CEPH_FEATURE_OSDENC) |
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 360d9d08ca9e..379f71508995 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h | |||
| @@ -8,6 +8,23 @@ | |||
| 8 | 8 | ||
| 9 | #include <linux/ceph/types.h> | 9 | #include <linux/ceph/types.h> |
| 10 | 10 | ||
| 11 | /* This seemed to be the easiest place to define these */ | ||
| 12 | |||
| 13 | #define U8_MAX ((u8)(~0U)) | ||
| 14 | #define U16_MAX ((u16)(~0U)) | ||
| 15 | #define U32_MAX ((u32)(~0U)) | ||
| 16 | #define U64_MAX ((u64)(~0ULL)) | ||
| 17 | |||
| 18 | #define S8_MAX ((s8)(U8_MAX >> 1)) | ||
| 19 | #define S16_MAX ((s16)(U16_MAX >> 1)) | ||
| 20 | #define S32_MAX ((s32)(U32_MAX >> 1)) | ||
| 21 | #define S64_MAX ((s64)(U64_MAX >> 1LL)) | ||
| 22 | |||
| 23 | #define S8_MIN ((s8)(-S8_MAX - 1)) | ||
| 24 | #define S16_MIN ((s16)(-S16_MAX - 1)) | ||
| 25 | #define S32_MIN ((s32)(-S32_MAX - 1)) | ||
| 26 | #define S64_MIN ((s64)(-S64_MAX - 1LL)) | ||
| 27 | |||
| 11 | /* | 28 | /* |
| 12 | * in all cases, | 29 | * in all cases, |
| 13 | * void **p pointer to position pointer | 30 | * void **p pointer to position pointer |
| @@ -137,14 +154,19 @@ bad: | |||
| 137 | static inline void ceph_decode_timespec(struct timespec *ts, | 154 | static inline void ceph_decode_timespec(struct timespec *ts, |
| 138 | const struct ceph_timespec *tv) | 155 | const struct ceph_timespec *tv) |
| 139 | { | 156 | { |
| 140 | ts->tv_sec = le32_to_cpu(tv->tv_sec); | 157 | ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec); |
| 141 | ts->tv_nsec = le32_to_cpu(tv->tv_nsec); | 158 | ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec); |
| 142 | } | 159 | } |
| 143 | static inline void ceph_encode_timespec(struct ceph_timespec *tv, | 160 | static inline void ceph_encode_timespec(struct ceph_timespec *tv, |
| 144 | const struct timespec *ts) | 161 | const struct timespec *ts) |
| 145 | { | 162 | { |
| 146 | tv->tv_sec = cpu_to_le32(ts->tv_sec); | 163 | BUG_ON(ts->tv_sec < 0); |
| 147 | tv->tv_nsec = cpu_to_le32(ts->tv_nsec); | 164 | BUG_ON(ts->tv_sec > (__kernel_time_t)U32_MAX); |
| 165 | BUG_ON(ts->tv_nsec < 0); | ||
| 166 | BUG_ON(ts->tv_nsec > (long)U32_MAX); | ||
| 167 | |||
| 168 | tv->tv_sec = cpu_to_le32((u32)ts->tv_sec); | ||
| 169 | tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec); | ||
| 148 | } | 170 | } |
| 149 | 171 | ||
| 150 | /* | 172 | /* |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 29818fc3fa49..2e3024881a5e 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
| @@ -66,6 +66,7 @@ struct ceph_options { | |||
| 66 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | 66 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 |
| 67 | 67 | ||
| 68 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 68 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
| 69 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) | ||
| 69 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | 70 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) |
| 70 | 71 | ||
| 71 | #define CEPH_AUTH_NAME_DEFAULT "guest" | 72 | #define CEPH_AUTH_NAME_DEFAULT "guest" |
| @@ -156,31 +157,11 @@ struct ceph_snap_context { | |||
| 156 | u64 snaps[]; | 157 | u64 snaps[]; |
| 157 | }; | 158 | }; |
| 158 | 159 | ||
| 159 | static inline struct ceph_snap_context * | 160 | extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count, |
| 160 | ceph_get_snap_context(struct ceph_snap_context *sc) | 161 | gfp_t gfp_flags); |
| 161 | { | 162 | extern struct ceph_snap_context *ceph_get_snap_context( |
| 162 | /* | 163 | struct ceph_snap_context *sc); |
| 163 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | 164 | extern void ceph_put_snap_context(struct ceph_snap_context *sc); |
| 164 | atomic_read(&sc->nref)+1); | ||
| 165 | */ | ||
| 166 | if (sc) | ||
| 167 | atomic_inc(&sc->nref); | ||
| 168 | return sc; | ||
| 169 | } | ||
| 170 | |||
| 171 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
| 172 | { | ||
| 173 | if (!sc) | ||
| 174 | return; | ||
| 175 | /* | ||
| 176 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 177 | atomic_read(&sc->nref)-1); | ||
| 178 | */ | ||
| 179 | if (atomic_dec_and_test(&sc->nref)) { | ||
| 180 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
| 181 | kfree(sc); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | 165 | ||
| 185 | /* | 166 | /* |
| 186 | * calculate the number of pages a given length and offset map onto, | 167 | * calculate the number of pages a given length and offset map onto, |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 60903e0f665c..7c1420bb1dce 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -64,6 +64,77 @@ struct ceph_messenger { | |||
| 64 | u32 required_features; | 64 | u32 required_features; |
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | enum ceph_msg_data_type { | ||
| 68 | CEPH_MSG_DATA_NONE, /* message contains no data payload */ | ||
| 69 | CEPH_MSG_DATA_PAGES, /* data source/destination is a page array */ | ||
| 70 | CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */ | ||
| 71 | #ifdef CONFIG_BLOCK | ||
| 72 | CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ | ||
| 73 | #endif /* CONFIG_BLOCK */ | ||
| 74 | }; | ||
| 75 | |||
| 76 | static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) | ||
| 77 | { | ||
| 78 | switch (type) { | ||
| 79 | case CEPH_MSG_DATA_NONE: | ||
| 80 | case CEPH_MSG_DATA_PAGES: | ||
| 81 | case CEPH_MSG_DATA_PAGELIST: | ||
| 82 | #ifdef CONFIG_BLOCK | ||
| 83 | case CEPH_MSG_DATA_BIO: | ||
| 84 | #endif /* CONFIG_BLOCK */ | ||
| 85 | return true; | ||
| 86 | default: | ||
| 87 | return false; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | struct ceph_msg_data { | ||
| 92 | struct list_head links; /* ceph_msg->data */ | ||
| 93 | enum ceph_msg_data_type type; | ||
| 94 | union { | ||
| 95 | #ifdef CONFIG_BLOCK | ||
| 96 | struct { | ||
| 97 | struct bio *bio; | ||
| 98 | size_t bio_length; | ||
| 99 | }; | ||
| 100 | #endif /* CONFIG_BLOCK */ | ||
| 101 | struct { | ||
| 102 | struct page **pages; /* NOT OWNER. */ | ||
| 103 | size_t length; /* total # bytes */ | ||
| 104 | unsigned int alignment; /* first page */ | ||
| 105 | }; | ||
| 106 | struct ceph_pagelist *pagelist; | ||
| 107 | }; | ||
| 108 | }; | ||
| 109 | |||
| 110 | struct ceph_msg_data_cursor { | ||
| 111 | size_t total_resid; /* across all data items */ | ||
| 112 | struct list_head *data_head; /* = &ceph_msg->data */ | ||
| 113 | |||
| 114 | struct ceph_msg_data *data; /* current data item */ | ||
| 115 | size_t resid; /* bytes not yet consumed */ | ||
| 116 | bool last_piece; /* current is last piece */ | ||
| 117 | bool need_crc; /* crc update needed */ | ||
| 118 | union { | ||
| 119 | #ifdef CONFIG_BLOCK | ||
| 120 | struct { /* bio */ | ||
| 121 | struct bio *bio; /* bio from list */ | ||
| 122 | unsigned int vector_index; /* vector from bio */ | ||
| 123 | unsigned int vector_offset; /* bytes from vector */ | ||
| 124 | }; | ||
| 125 | #endif /* CONFIG_BLOCK */ | ||
| 126 | struct { /* pages */ | ||
| 127 | unsigned int page_offset; /* offset in page */ | ||
| 128 | unsigned short page_index; /* index in array */ | ||
| 129 | unsigned short page_count; /* pages in array */ | ||
| 130 | }; | ||
| 131 | struct { /* pagelist */ | ||
| 132 | struct page *page; /* page from list */ | ||
| 133 | size_t offset; /* bytes from list */ | ||
| 134 | }; | ||
| 135 | }; | ||
| 136 | }; | ||
| 137 | |||
| 67 | /* | 138 | /* |
| 68 | * a single message. it contains a header (src, dest, message type, etc.), | 139 | * a single message. it contains a header (src, dest, message type, etc.), |
| 69 | * footer (crc values, mainly), a "front" message body, and possibly a | 140 | * footer (crc values, mainly), a "front" message body, and possibly a |
| @@ -74,21 +145,15 @@ struct ceph_msg { | |||
| 74 | struct ceph_msg_footer footer; /* footer */ | 145 | struct ceph_msg_footer footer; /* footer */ |
| 75 | struct kvec front; /* unaligned blobs of message */ | 146 | struct kvec front; /* unaligned blobs of message */ |
| 76 | struct ceph_buffer *middle; | 147 | struct ceph_buffer *middle; |
| 77 | struct page **pages; /* data payload. NOT OWNER. */ | 148 | |
| 78 | unsigned nr_pages; /* size of page array */ | 149 | size_t data_length; |
| 79 | unsigned page_alignment; /* io offset in first page */ | 150 | struct list_head data; |
| 80 | struct ceph_pagelist *pagelist; /* instead of pages */ | 151 | struct ceph_msg_data_cursor cursor; |
| 81 | 152 | ||
| 82 | struct ceph_connection *con; | 153 | struct ceph_connection *con; |
| 83 | struct list_head list_head; | 154 | struct list_head list_head; /* links for connection lists */ |
| 84 | 155 | ||
| 85 | struct kref kref; | 156 | struct kref kref; |
| 86 | #ifdef CONFIG_BLOCK | ||
| 87 | struct bio *bio; /* instead of pages/pagelist */ | ||
| 88 | struct bio *bio_iter; /* bio iterator */ | ||
| 89 | int bio_seg; /* current bio segment */ | ||
| 90 | #endif /* CONFIG_BLOCK */ | ||
| 91 | struct ceph_pagelist *trail; /* the trailing part of the data */ | ||
| 92 | bool front_is_vmalloc; | 157 | bool front_is_vmalloc; |
| 93 | bool more_to_follow; | 158 | bool more_to_follow; |
| 94 | bool needs_out_seq; | 159 | bool needs_out_seq; |
| @@ -98,12 +163,6 @@ struct ceph_msg { | |||
| 98 | struct ceph_msgpool *pool; | 163 | struct ceph_msgpool *pool; |
| 99 | }; | 164 | }; |
| 100 | 165 | ||
| 101 | struct ceph_msg_pos { | ||
| 102 | int page, page_pos; /* which page; offset in page */ | ||
| 103 | int data_pos; /* offset in data payload */ | ||
| 104 | bool did_page_crc; /* true if we've calculated crc for current page */ | ||
| 105 | }; | ||
| 106 | |||
| 107 | /* ceph connection fault delay defaults, for exponential backoff */ | 166 | /* ceph connection fault delay defaults, for exponential backoff */ |
| 108 | #define BASE_DELAY_INTERVAL (HZ/2) | 167 | #define BASE_DELAY_INTERVAL (HZ/2) |
| 109 | #define MAX_DELAY_INTERVAL (5 * 60 * HZ) | 168 | #define MAX_DELAY_INTERVAL (5 * 60 * HZ) |
| @@ -161,7 +220,6 @@ struct ceph_connection { | |||
| 161 | struct ceph_msg *out_msg; /* sending message (== tail of | 220 | struct ceph_msg *out_msg; /* sending message (== tail of |
| 162 | out_sent) */ | 221 | out_sent) */ |
| 163 | bool out_msg_done; | 222 | bool out_msg_done; |
| 164 | struct ceph_msg_pos out_msg_pos; | ||
| 165 | 223 | ||
| 166 | struct kvec out_kvec[8], /* sending header/footer data */ | 224 | struct kvec out_kvec[8], /* sending header/footer data */ |
| 167 | *out_kvec_cur; | 225 | *out_kvec_cur; |
| @@ -175,7 +233,6 @@ struct ceph_connection { | |||
| 175 | /* message in temps */ | 233 | /* message in temps */ |
| 176 | struct ceph_msg_header in_hdr; | 234 | struct ceph_msg_header in_hdr; |
| 177 | struct ceph_msg *in_msg; | 235 | struct ceph_msg *in_msg; |
| 178 | struct ceph_msg_pos in_msg_pos; | ||
| 179 | u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ | 236 | u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ |
| 180 | 237 | ||
| 181 | char in_tag; /* protocol control byte */ | 238 | char in_tag; /* protocol control byte */ |
| @@ -218,6 +275,15 @@ extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); | |||
| 218 | 275 | ||
| 219 | extern void ceph_con_keepalive(struct ceph_connection *con); | 276 | extern void ceph_con_keepalive(struct ceph_connection *con); |
| 220 | 277 | ||
| 278 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, | ||
| 279 | size_t length, size_t alignment); | ||
| 280 | extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, | ||
| 281 | struct ceph_pagelist *pagelist); | ||
| 282 | #ifdef CONFIG_BLOCK | ||
| 283 | extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, | ||
| 284 | size_t length); | ||
| 285 | #endif /* CONFIG_BLOCK */ | ||
| 286 | |||
| 221 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, | 287 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
| 222 | bool can_fail); | 288 | bool can_fail); |
| 223 | extern void ceph_msg_kfree(struct ceph_msg *m); | 289 | extern void ceph_msg_kfree(struct ceph_msg *m); |
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 680d3d648cac..3d94a73b5f30 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
| @@ -87,6 +87,7 @@ struct ceph_entity_inst { | |||
| 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ |
| 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ |
| 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ |
| 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ | ||
| 90 | 91 | ||
| 91 | 92 | ||
| 92 | /* | 93 | /* |
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 1dd5d466b6f9..186db0bf4951 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
| @@ -29,6 +29,7 @@ struct ceph_authorizer; | |||
| 29 | */ | 29 | */ |
| 30 | typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, | 30 | typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, |
| 31 | struct ceph_msg *); | 31 | struct ceph_msg *); |
| 32 | typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); | ||
| 32 | 33 | ||
| 33 | /* a given osd we're communicating with */ | 34 | /* a given osd we're communicating with */ |
| 34 | struct ceph_osd { | 35 | struct ceph_osd { |
| @@ -48,7 +49,67 @@ struct ceph_osd { | |||
| 48 | }; | 49 | }; |
| 49 | 50 | ||
| 50 | 51 | ||
| 51 | #define CEPH_OSD_MAX_OP 10 | 52 | #define CEPH_OSD_MAX_OP 2 |
| 53 | |||
| 54 | enum ceph_osd_data_type { | ||
| 55 | CEPH_OSD_DATA_TYPE_NONE = 0, | ||
| 56 | CEPH_OSD_DATA_TYPE_PAGES, | ||
| 57 | CEPH_OSD_DATA_TYPE_PAGELIST, | ||
| 58 | #ifdef CONFIG_BLOCK | ||
| 59 | CEPH_OSD_DATA_TYPE_BIO, | ||
| 60 | #endif /* CONFIG_BLOCK */ | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct ceph_osd_data { | ||
| 64 | enum ceph_osd_data_type type; | ||
| 65 | union { | ||
| 66 | struct { | ||
| 67 | struct page **pages; | ||
| 68 | u64 length; | ||
| 69 | u32 alignment; | ||
| 70 | bool pages_from_pool; | ||
| 71 | bool own_pages; | ||
| 72 | }; | ||
| 73 | struct ceph_pagelist *pagelist; | ||
| 74 | #ifdef CONFIG_BLOCK | ||
| 75 | struct { | ||
| 76 | struct bio *bio; /* list of bios */ | ||
| 77 | size_t bio_length; /* total in list */ | ||
| 78 | }; | ||
| 79 | #endif /* CONFIG_BLOCK */ | ||
| 80 | }; | ||
| 81 | }; | ||
| 82 | |||
| 83 | struct ceph_osd_req_op { | ||
| 84 | u16 op; /* CEPH_OSD_OP_* */ | ||
| 85 | u32 payload_len; | ||
| 86 | union { | ||
| 87 | struct ceph_osd_data raw_data_in; | ||
| 88 | struct { | ||
| 89 | u64 offset, length; | ||
| 90 | u64 truncate_size; | ||
| 91 | u32 truncate_seq; | ||
| 92 | struct ceph_osd_data osd_data; | ||
| 93 | } extent; | ||
| 94 | struct { | ||
| 95 | const char *class_name; | ||
| 96 | const char *method_name; | ||
| 97 | struct ceph_osd_data request_info; | ||
| 98 | struct ceph_osd_data request_data; | ||
| 99 | struct ceph_osd_data response_data; | ||
| 100 | __u8 class_len; | ||
| 101 | __u8 method_len; | ||
| 102 | __u8 argc; | ||
| 103 | } cls; | ||
| 104 | struct { | ||
| 105 | u64 cookie; | ||
| 106 | u64 ver; | ||
| 107 | u32 prot_ver; | ||
| 108 | u32 timeout; | ||
| 109 | __u8 flag; | ||
| 110 | } watch; | ||
| 111 | }; | ||
| 112 | }; | ||
| 52 | 113 | ||
| 53 | /* an in-flight request */ | 114 | /* an in-flight request */ |
| 54 | struct ceph_osd_request { | 115 | struct ceph_osd_request { |
| @@ -63,15 +124,14 @@ struct ceph_osd_request { | |||
| 63 | int r_pg_osds[CEPH_PG_MAX_SIZE]; | 124 | int r_pg_osds[CEPH_PG_MAX_SIZE]; |
| 64 | int r_num_pg_osds; | 125 | int r_num_pg_osds; |
| 65 | 126 | ||
| 66 | struct ceph_connection *r_con_filling_msg; | ||
| 67 | |||
| 68 | struct ceph_msg *r_request, *r_reply; | 127 | struct ceph_msg *r_request, *r_reply; |
| 69 | int r_flags; /* any additional flags for the osd */ | 128 | int r_flags; /* any additional flags for the osd */ |
| 70 | u32 r_sent; /* >0 if r_request is sending/sent */ | 129 | u32 r_sent; /* >0 if r_request is sending/sent */ |
| 71 | int r_num_ops; | ||
| 72 | 130 | ||
| 73 | /* encoded message content */ | 131 | /* request osd ops array */ |
| 74 | struct ceph_osd_op *r_request_ops; | 132 | unsigned int r_num_ops; |
| 133 | struct ceph_osd_req_op r_ops[CEPH_OSD_MAX_OP]; | ||
| 134 | |||
| 75 | /* these are updated on each send */ | 135 | /* these are updated on each send */ |
| 76 | __le32 *r_request_osdmap_epoch; | 136 | __le32 *r_request_osdmap_epoch; |
| 77 | __le32 *r_request_flags; | 137 | __le32 *r_request_flags; |
| @@ -85,12 +145,14 @@ struct ceph_osd_request { | |||
| 85 | s32 r_reply_op_result[CEPH_OSD_MAX_OP]; | 145 | s32 r_reply_op_result[CEPH_OSD_MAX_OP]; |
| 86 | int r_got_reply; | 146 | int r_got_reply; |
| 87 | int r_linger; | 147 | int r_linger; |
| 148 | int r_completed; | ||
| 88 | 149 | ||
| 89 | struct ceph_osd_client *r_osdc; | 150 | struct ceph_osd_client *r_osdc; |
| 90 | struct kref r_kref; | 151 | struct kref r_kref; |
| 91 | bool r_mempool; | 152 | bool r_mempool; |
| 92 | struct completion r_completion, r_safe_completion; | 153 | struct completion r_completion, r_safe_completion; |
| 93 | ceph_osdc_callback_t r_callback, r_safe_callback; | 154 | ceph_osdc_callback_t r_callback; |
| 155 | ceph_osdc_unsafe_callback_t r_unsafe_callback; | ||
| 94 | struct ceph_eversion r_reassert_version; | 156 | struct ceph_eversion r_reassert_version; |
| 95 | struct list_head r_unsafe_item; | 157 | struct list_head r_unsafe_item; |
| 96 | 158 | ||
| @@ -104,16 +166,6 @@ struct ceph_osd_request { | |||
| 104 | 166 | ||
| 105 | struct ceph_file_layout r_file_layout; | 167 | struct ceph_file_layout r_file_layout; |
| 106 | struct ceph_snap_context *r_snapc; /* snap context for writes */ | 168 | struct ceph_snap_context *r_snapc; /* snap context for writes */ |
| 107 | unsigned r_num_pages; /* size of page array (follows) */ | ||
| 108 | unsigned r_page_alignment; /* io offset in first page */ | ||
| 109 | struct page **r_pages; /* pages for data payload */ | ||
| 110 | int r_pages_from_pool; | ||
| 111 | int r_own_pages; /* if true, i own page list */ | ||
| 112 | #ifdef CONFIG_BLOCK | ||
| 113 | struct bio *r_bio; /* instead of pages */ | ||
| 114 | #endif | ||
| 115 | |||
| 116 | struct ceph_pagelist r_trail; /* trailing part of the data */ | ||
| 117 | }; | 169 | }; |
| 118 | 170 | ||
| 119 | struct ceph_osd_event { | 171 | struct ceph_osd_event { |
| @@ -172,48 +224,8 @@ struct ceph_osd_client { | |||
| 172 | struct workqueue_struct *notify_wq; | 224 | struct workqueue_struct *notify_wq; |
| 173 | }; | 225 | }; |
| 174 | 226 | ||
| 175 | struct ceph_osd_req_op { | 227 | extern int ceph_osdc_setup(void); |
| 176 | u16 op; /* CEPH_OSD_OP_* */ | 228 | extern void ceph_osdc_cleanup(void); |
| 177 | u32 payload_len; | ||
| 178 | union { | ||
| 179 | struct { | ||
| 180 | u64 offset, length; | ||
| 181 | u64 truncate_size; | ||
| 182 | u32 truncate_seq; | ||
| 183 | } extent; | ||
| 184 | struct { | ||
| 185 | const char *name; | ||
| 186 | const char *val; | ||
| 187 | u32 name_len; | ||
| 188 | u32 value_len; | ||
| 189 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
| 190 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
| 191 | } xattr; | ||
| 192 | struct { | ||
| 193 | const char *class_name; | ||
| 194 | const char *method_name; | ||
| 195 | const char *indata; | ||
| 196 | u32 indata_len; | ||
| 197 | __u8 class_len; | ||
| 198 | __u8 method_len; | ||
| 199 | __u8 argc; | ||
| 200 | } cls; | ||
| 201 | struct { | ||
| 202 | u64 cookie; | ||
| 203 | u64 count; | ||
| 204 | } pgls; | ||
| 205 | struct { | ||
| 206 | u64 snapid; | ||
| 207 | } snap; | ||
| 208 | struct { | ||
| 209 | u64 cookie; | ||
| 210 | u64 ver; | ||
| 211 | u32 prot_ver; | ||
| 212 | u32 timeout; | ||
| 213 | __u8 flag; | ||
| 214 | } watch; | ||
| 215 | }; | ||
| 216 | }; | ||
| 217 | 229 | ||
| 218 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 230 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
| 219 | struct ceph_client *client); | 231 | struct ceph_client *client); |
| @@ -224,16 +236,71 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||
| 224 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | 236 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, |
| 225 | struct ceph_msg *msg); | 237 | struct ceph_msg *msg); |
| 226 | 238 | ||
| 239 | extern void osd_req_op_init(struct ceph_osd_request *osd_req, | ||
| 240 | unsigned int which, u16 opcode); | ||
| 241 | |||
| 242 | extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, | ||
| 243 | unsigned int which, | ||
| 244 | struct page **pages, u64 length, | ||
| 245 | u32 alignment, bool pages_from_pool, | ||
| 246 | bool own_pages); | ||
| 247 | |||
| 248 | extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, | ||
| 249 | unsigned int which, u16 opcode, | ||
| 250 | u64 offset, u64 length, | ||
| 251 | u64 truncate_size, u32 truncate_seq); | ||
| 252 | extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, | ||
| 253 | unsigned int which, u64 length); | ||
| 254 | |||
| 255 | extern struct ceph_osd_data *osd_req_op_extent_osd_data( | ||
| 256 | struct ceph_osd_request *osd_req, | ||
| 257 | unsigned int which); | ||
| 258 | extern struct ceph_osd_data *osd_req_op_cls_response_data( | ||
| 259 | struct ceph_osd_request *osd_req, | ||
| 260 | unsigned int which); | ||
| 261 | |||
| 262 | extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, | ||
| 263 | unsigned int which, | ||
| 264 | struct page **pages, u64 length, | ||
| 265 | u32 alignment, bool pages_from_pool, | ||
| 266 | bool own_pages); | ||
| 267 | extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, | ||
| 268 | unsigned int which, | ||
| 269 | struct ceph_pagelist *pagelist); | ||
| 270 | #ifdef CONFIG_BLOCK | ||
| 271 | extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *, | ||
| 272 | unsigned int which, | ||
| 273 | struct bio *bio, size_t bio_length); | ||
| 274 | #endif /* CONFIG_BLOCK */ | ||
| 275 | |||
| 276 | extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, | ||
| 277 | unsigned int which, | ||
| 278 | struct ceph_pagelist *pagelist); | ||
| 279 | extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, | ||
| 280 | unsigned int which, | ||
| 281 | struct page **pages, u64 length, | ||
| 282 | u32 alignment, bool pages_from_pool, | ||
| 283 | bool own_pages); | ||
| 284 | extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, | ||
| 285 | unsigned int which, | ||
| 286 | struct page **pages, u64 length, | ||
| 287 | u32 alignment, bool pages_from_pool, | ||
| 288 | bool own_pages); | ||
| 289 | |||
| 290 | extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, | ||
| 291 | unsigned int which, u16 opcode, | ||
| 292 | const char *class, const char *method); | ||
| 293 | extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, | ||
| 294 | unsigned int which, u16 opcode, | ||
| 295 | u64 cookie, u64 version, int flag); | ||
| 296 | |||
| 227 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | 297 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
| 228 | struct ceph_snap_context *snapc, | 298 | struct ceph_snap_context *snapc, |
| 229 | unsigned int num_op, | 299 | unsigned int num_ops, |
| 230 | bool use_mempool, | 300 | bool use_mempool, |
| 231 | gfp_t gfp_flags); | 301 | gfp_t gfp_flags); |
| 232 | 302 | ||
| 233 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | 303 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, |
| 234 | u64 off, u64 len, | ||
| 235 | unsigned int num_op, | ||
| 236 | struct ceph_osd_req_op *src_ops, | ||
| 237 | struct ceph_snap_context *snapc, | 304 | struct ceph_snap_context *snapc, |
| 238 | u64 snap_id, | 305 | u64 snap_id, |
| 239 | struct timespec *mtime); | 306 | struct timespec *mtime); |
| @@ -241,12 +308,11 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
| 241 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | 308 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, |
| 242 | struct ceph_file_layout *layout, | 309 | struct ceph_file_layout *layout, |
| 243 | struct ceph_vino vino, | 310 | struct ceph_vino vino, |
| 244 | u64 offset, u64 *len, int op, int flags, | 311 | u64 offset, u64 *len, |
| 312 | int num_ops, int opcode, int flags, | ||
| 245 | struct ceph_snap_context *snapc, | 313 | struct ceph_snap_context *snapc, |
| 246 | int do_sync, u32 truncate_seq, | 314 | u32 truncate_seq, u64 truncate_size, |
| 247 | u64 truncate_size, | 315 | bool use_mempool); |
| 248 | struct timespec *mtime, | ||
| 249 | bool use_mempool, int page_align); | ||
| 250 | 316 | ||
| 251 | extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, | 317 | extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, |
| 252 | struct ceph_osd_request *req); | 318 | struct ceph_osd_request *req); |
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c819190d1642..d05cc4451af6 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
| 5 | #include <linux/ceph/types.h> | 5 | #include <linux/ceph/types.h> |
| 6 | #include <linux/ceph/decode.h> | ||
| 6 | #include <linux/ceph/ceph_fs.h> | 7 | #include <linux/ceph/ceph_fs.h> |
| 7 | #include <linux/crush/crush.h> | 8 | #include <linux/crush/crush.h> |
| 8 | 9 | ||
| @@ -119,6 +120,29 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, | |||
| 119 | return &map->osd_addr[osd]; | 120 | return &map->osd_addr[osd]; |
| 120 | } | 121 | } |
| 121 | 122 | ||
| 123 | static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) | ||
| 124 | { | ||
| 125 | __u8 version; | ||
| 126 | |||
| 127 | if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { | ||
| 128 | pr_warning("incomplete pg encoding"); | ||
| 129 | |||
| 130 | return -EINVAL; | ||
| 131 | } | ||
| 132 | version = ceph_decode_8(p); | ||
| 133 | if (version > 1) { | ||
| 134 | pr_warning("do not understand pg encoding %d > 1", | ||
| 135 | (int)version); | ||
| 136 | return -EINVAL; | ||
| 137 | } | ||
| 138 | |||
| 139 | pgid->pool = ceph_decode_64(p); | ||
| 140 | pgid->seed = ceph_decode_32(p); | ||
| 141 | *p += 4; /* skip deprecated preferred value */ | ||
| 142 | |||
| 143 | return 0; | ||
| 144 | } | ||
| 145 | |||
| 122 | extern struct ceph_osdmap *osdmap_decode(void **p, void *end); | 146 | extern struct ceph_osdmap *osdmap_decode(void **p, void *end); |
| 123 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | 147 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, |
| 124 | struct ceph_osdmap *map, | 148 | struct ceph_osdmap *map, |
| @@ -131,10 +155,8 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
| 131 | u64 *bno, u64 *oxoff, u64 *oxlen); | 155 | u64 *bno, u64 *oxoff, u64 *oxlen); |
| 132 | 156 | ||
| 133 | /* calculate mapping of object to a placement group */ | 157 | /* calculate mapping of object to a placement group */ |
| 134 | extern int ceph_calc_object_layout(struct ceph_pg *pg, | 158 | extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, |
| 135 | const char *oid, | 159 | struct ceph_osdmap *osdmap, uint64_t pool); |
| 136 | struct ceph_file_layout *fl, | ||
| 137 | struct ceph_osdmap *osdmap); | ||
| 138 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, | 160 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, |
| 139 | struct ceph_pg pgid, | 161 | struct ceph_pg pgid, |
| 140 | int *acting); | 162 | int *acting); |
