summaryrefslogtreecommitdiffstats
path: root/include/linux/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2016-05-25 19:15:02 -0400
committerIlya Dryomov <idryomov@gmail.com>2016-05-25 19:15:02 -0400
commit922dab6134178cae317ae00de86376cba59f3147 (patch)
treea7047a5950b6a8505cc1e6852e4532656064fede /include/linux/ceph
parentc525f03601f52c83ded046624138f2a45e0ba56c (diff)
libceph, rbd: ceph_osd_linger_request, watch/notify v2
This adds support and switches rbd to a new, more reliable version of watch/notify protocol. As with the OSD client update, this is mostly about getting the right structures linked into the right places so that reconnects are properly sent when needed. watch/notify v2 also requires sending regular pings to the OSDs - send_linger_ping(). A major change from the old watch/notify implementation is the introduction of ceph_osd_linger_request - linger requests no longer piggy back on ceph_osd_request. ceph_osd_event has been merged into ceph_osd_linger_request. All the details are now hidden within libceph, the interface consists of a simple pair of watch/unwatch functions and ceph_osdc_notify_ack(). ceph_osdc_watch() does return ceph_osd_linger_request, but only to keep the lifetime management simple. ceph_osdc_notify_ack() accepts an optional data payload, which is relayed back to the notifier. Portions of this patch are loosely based on work by Douglas Fuller <dfuller@redhat.com> and Mike Christie <michaelc@cs.wisc.edu>. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/ceph_fs.h5
-rw-r--r--include/linux/ceph/osd_client.h97
-rw-r--r--include/linux/ceph/rados.h17
3 files changed, 75 insertions, 44 deletions
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 37f28bf55ce4..3b911ff889dd 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -153,8 +153,9 @@ struct ceph_dir_layout {
153 153
154/* watch-notify operations */ 154/* watch-notify operations */
155enum { 155enum {
156 WATCH_NOTIFY = 1, /* notifying watcher */ 156 CEPH_WATCH_EVENT_NOTIFY = 1, /* notifying watcher */
157 WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */ 157 CEPH_WATCH_EVENT_NOTIFY_COMPLETE = 2, /* notifier notified when done */
158 CEPH_WATCH_EVENT_DISCONNECT = 3, /* we were disconnected */
158}; 159};
159 160
160 161
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 342f22f1f040..cd2dcb8939de 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -34,7 +34,7 @@ struct ceph_osd {
34 struct rb_node o_node; 34 struct rb_node o_node;
35 struct ceph_connection o_con; 35 struct ceph_connection o_con;
36 struct rb_root o_requests; 36 struct rb_root o_requests;
37 struct list_head o_linger_requests; 37 struct rb_root o_linger_requests;
38 struct list_head o_osd_lru; 38 struct list_head o_osd_lru;
39 struct ceph_auth_handshake o_auth; 39 struct ceph_auth_handshake o_auth;
40 unsigned long lru_ttl; 40 unsigned long lru_ttl;
@@ -108,12 +108,13 @@ struct ceph_osd_req_op {
108 } cls; 108 } cls;
109 struct { 109 struct {
110 u64 cookie; 110 u64 cookie;
111 u64 ver; 111 __u8 op; /* CEPH_OSD_WATCH_OP_ */
112 u32 prot_ver; 112 u32 gen;
113 u32 timeout;
114 __u8 flag;
115 } watch; 113 } watch;
116 struct { 114 struct {
115 struct ceph_osd_data request_data;
116 } notify_ack;
117 struct {
117 u64 expected_object_size; 118 u64 expected_object_size;
118 u64 expected_write_size; 119 u64 expected_write_size;
119 } alloc_hint; 120 } alloc_hint;
@@ -145,8 +146,6 @@ struct ceph_osd_request_target {
145struct ceph_osd_request { 146struct ceph_osd_request {
146 u64 r_tid; /* unique for this client */ 147 u64 r_tid; /* unique for this client */
147 struct rb_node r_node; 148 struct rb_node r_node;
148 struct list_head r_linger_item;
149 struct list_head r_linger_osd_item;
150 struct ceph_osd *r_osd; 149 struct ceph_osd *r_osd;
151 150
152 struct ceph_osd_request_target r_t; 151 struct ceph_osd_request_target r_t;
@@ -162,7 +161,6 @@ struct ceph_osd_request {
162 161
163 int r_result; 162 int r_result;
164 bool r_got_reply; 163 bool r_got_reply;
165 int r_linger;
166 164
167 struct ceph_osd_client *r_osdc; 165 struct ceph_osd_client *r_osdc;
168 struct kref r_kref; 166 struct kref r_kref;
@@ -181,6 +179,7 @@ struct ceph_osd_request {
181 struct ceph_snap_context *r_snapc; /* for writes */ 179 struct ceph_snap_context *r_snapc; /* for writes */
182 struct timespec r_mtime; /* ditto */ 180 struct timespec r_mtime; /* ditto */
183 u64 r_data_offset; /* ditto */ 181 u64 r_data_offset; /* ditto */
182 bool r_linger; /* don't resend on failure */
184 183
185 /* internal */ 184 /* internal */
186 unsigned long r_stamp; /* jiffies, send or check time */ 185 unsigned long r_stamp; /* jiffies, send or check time */
@@ -195,23 +194,40 @@ struct ceph_request_redirect {
195 struct ceph_object_locator oloc; 194 struct ceph_object_locator oloc;
196}; 195};
197 196
198struct ceph_osd_event { 197typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
199 u64 cookie; 198 u64 notifier_id, void *data, size_t data_len);
200 int one_shot; 199typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
200
201struct ceph_osd_linger_request {
201 struct ceph_osd_client *osdc; 202 struct ceph_osd_client *osdc;
202 void (*cb)(u64, u64, u8, void *); 203 u64 linger_id;
203 void *data; 204 bool committed;
204 struct rb_node node; 205
205 struct list_head osd_node; 206 struct ceph_osd *osd;
207 struct ceph_osd_request *reg_req;
208 struct ceph_osd_request *ping_req;
209 unsigned long ping_sent;
210
211 struct ceph_osd_request_target t;
212 u32 last_force_resend;
213
214 struct timespec mtime;
215
206 struct kref kref; 216 struct kref kref;
207}; 217 struct mutex lock;
218 struct rb_node node; /* osd */
219 struct rb_node osdc_node; /* osdc */
220 struct list_head scan_item;
221
222 struct completion reg_commit_wait;
223 int reg_commit_error;
224 int last_error;
225
226 u32 register_gen;
208 227
209struct ceph_osd_event_work { 228 rados_watchcb2_t wcb;
210 struct work_struct work; 229 rados_watcherrcb_t errcb;
211 struct ceph_osd_event *event; 230 void *data;
212 u64 ver;
213 u64 notify_id;
214 u8 opcode;
215}; 231};
216 232
217struct ceph_osd_client { 233struct ceph_osd_client {
@@ -223,9 +239,10 @@ struct ceph_osd_client {
223 struct rb_root osds; /* osds */ 239 struct rb_root osds; /* osds */
224 struct list_head osd_lru; /* idle osds */ 240 struct list_head osd_lru; /* idle osds */
225 spinlock_t osd_lru_lock; 241 spinlock_t osd_lru_lock;
226 struct list_head req_linger; /* lingering requests */
227 struct ceph_osd homeless_osd; 242 struct ceph_osd homeless_osd;
228 atomic64_t last_tid; /* tid of last request */ 243 atomic64_t last_tid; /* tid of last request */
244 u64 last_linger_id;
245 struct rb_root linger_requests; /* lingering requests */
229 atomic_t num_requests; 246 atomic_t num_requests;
230 atomic_t num_homeless; 247 atomic_t num_homeless;
231 struct delayed_work timeout_work; 248 struct delayed_work timeout_work;
@@ -239,10 +256,6 @@ struct ceph_osd_client {
239 struct ceph_msgpool msgpool_op; 256 struct ceph_msgpool msgpool_op;
240 struct ceph_msgpool msgpool_op_reply; 257 struct ceph_msgpool msgpool_op_reply;
241 258
242 spinlock_t event_lock;
243 struct rb_root event_tree;
244 u64 event_count;
245
246 struct workqueue_struct *notify_wq; 259 struct workqueue_struct *notify_wq;
247}; 260};
248 261
@@ -314,9 +327,6 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
314extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 327extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
315 u16 opcode, const char *name, const void *value, 328 u16 opcode, const char *name, const void *value,
316 size_t size, u8 cmp_op, u8 cmp_mode); 329 size_t size, u8 cmp_op, u8 cmp_mode);
317extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
318 unsigned int which, u16 opcode,
319 u64 cookie, u64 version, int flag);
320extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 330extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
321 unsigned int which, 331 unsigned int which,
322 u64 expected_object_size, 332 u64 expected_object_size,
@@ -339,9 +349,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
339 u32 truncate_seq, u64 truncate_size, 349 u32 truncate_seq, u64 truncate_size,
340 bool use_mempool); 350 bool use_mempool);
341 351
342extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
343 struct ceph_osd_request *req);
344
345extern void ceph_osdc_get_request(struct ceph_osd_request *req); 352extern void ceph_osdc_get_request(struct ceph_osd_request *req);
346extern void ceph_osdc_put_request(struct ceph_osd_request *req); 353extern void ceph_osdc_put_request(struct ceph_osd_request *req);
347 354
@@ -372,11 +379,23 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
372 struct timespec *mtime, 379 struct timespec *mtime,
373 struct page **pages, int nr_pages); 380 struct page **pages, int nr_pages);
374 381
375/* watch/notify events */ 382/* watch/notify */
376extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, 383struct ceph_osd_linger_request *
377 void (*event_cb)(u64, u64, u8, void *), 384ceph_osdc_watch(struct ceph_osd_client *osdc,
378 void *data, struct ceph_osd_event **pevent); 385 struct ceph_object_id *oid,
379extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); 386 struct ceph_object_locator *oloc,
380extern void ceph_osdc_put_event(struct ceph_osd_event *event); 387 rados_watchcb2_t wcb,
388 rados_watcherrcb_t errcb,
389 void *data);
390int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
391 struct ceph_osd_linger_request *lreq);
392
393int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
394 struct ceph_object_id *oid,
395 struct ceph_object_locator *oloc,
396 u64 notify_id,
397 u64 cookie,
398 void *payload,
399 size_t payload_len);
381#endif 400#endif
382 401
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 28740a58f32c..204c8c944703 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -427,7 +427,17 @@ enum {
427 CEPH_OSD_CMPXATTR_MODE_U64 = 2 427 CEPH_OSD_CMPXATTR_MODE_U64 = 2
428}; 428};
429 429
430#define RADOS_NOTIFY_VER 1 430enum {
431 CEPH_OSD_WATCH_OP_UNWATCH = 0,
432 CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
433 /* note: use only ODD ids to prevent pre-giant code from
434 interpreting the op as UNWATCH */
435 CEPH_OSD_WATCH_OP_WATCH = 3,
436 CEPH_OSD_WATCH_OP_RECONNECT = 5,
437 CEPH_OSD_WATCH_OP_PING = 7,
438};
439
440const char *ceph_osd_watch_op_name(int o);
431 441
432/* 442/*
433 * an individual object operation. each may be accompanied by some data 443 * an individual object operation. each may be accompanied by some data
@@ -462,8 +472,9 @@ struct ceph_osd_op {
462 } __attribute__ ((packed)) snap; 472 } __attribute__ ((packed)) snap;
463 struct { 473 struct {
464 __le64 cookie; 474 __le64 cookie;
465 __le64 ver; 475 __le64 ver; /* no longer used */
466 __u8 flag; /* 0 = unwatch, 1 = watch */ 476 __u8 op; /* CEPH_OSD_WATCH_OP_* */
477 __le32 gen; /* registration generation */
467 } __attribute__ ((packed)) watch; 478 } __attribute__ ((packed)) watch;
468 struct { 479 struct {
469 __le64 offset, length; 480 __le64 offset, length;