aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/rds.h
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/rds/rds.h
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/rds/rds.h')
-rw-r--r--net/rds/rds.h193
1 files changed, 153 insertions, 40 deletions
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba9..da8adac2bf06 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) 50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
51 51
52#define RDS_CONG_MAP_BYTES (65536 / 8) 52#define RDS_CONG_MAP_BYTES (65536 / 8)
53#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
54#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) 53#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
55#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 54#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
56 55
@@ -80,6 +79,7 @@ enum {
80/* Bits for c_flags */ 79/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 80#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 81#define RDS_RECONNECT_PENDING 1
82#define RDS_IN_XMIT 2
83 83
84struct rds_connection { 84struct rds_connection {
85 struct hlist_node c_hash_node; 85 struct hlist_node c_hash_node;
@@ -91,12 +91,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 91 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 92 struct rds_cong_map *c_fcong;
93 93
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 94 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 95 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 96 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 97 unsigned int c_xmit_data_off;
98 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 99 unsigned int c_xmit_rdma_sent;
100 unsigned int c_xmit_data_sent;
100 101
101 spinlock_t c_lock; /* protect msg queues */ 102 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 103 u64 c_next_tx_seq;
@@ -116,11 +117,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 117 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 118 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 119 struct mutex c_cm_lock; /* protect conn state & cm */
120 wait_queue_head_t c_waitq;
119 121
120 struct list_head c_map_item; 122 struct list_head c_map_item;
121 unsigned long c_map_queued; 123 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 124
125 unsigned int c_unacked_packets; 125 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 126 unsigned int c_unacked_bytes;
@@ -206,6 +206,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 206 rds_rdma_cookie_t i_rdma_cookie;
207}; 207};
208 208
209struct rds_mr {
210 struct rb_node r_rb_node;
211 atomic_t r_refcount;
212 u32 r_key;
213
214 /* A copy of the creation flags */
215 unsigned int r_use_once:1;
216 unsigned int r_invalidate:1;
217 unsigned int r_write:1;
218
219 /* This is for RDS_MR_DEAD.
220 * It would be nice & consistent to make this part of the above
221 * bit field here, but we need to use test_and_set_bit.
222 */
223 unsigned long r_state;
224 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
225 struct rds_transport *r_trans;
226 void *r_trans_private;
227};
228
229/* Flags for mr->r_state */
230#define RDS_MR_DEAD 0
231
232static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
233{
234 return r_key | (((u64) offset) << 32);
235}
236
237static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
238{
239 return cookie;
240}
241
242static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
243{
244 return cookie >> 32;
245}
246
247/* atomic operation types */
248#define RDS_ATOMIC_TYPE_CSWP 0
249#define RDS_ATOMIC_TYPE_FADD 1
250
209/* 251/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 252 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 253 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +300,71 @@ struct rds_message {
258 * -> rs->rs_lock 300 * -> rs->rs_lock
259 */ 301 */
260 spinlock_t m_rs_lock; 302 spinlock_t m_rs_lock;
303 wait_queue_head_t m_flush_wait;
304
261 struct rds_sock *m_rs; 305 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 306
307 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 308 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 309
265 unsigned int m_nents; 310 unsigned int m_used_sgs;
266 unsigned int m_count; 311 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 312
313 void *m_final_op;
314
315 struct {
316 struct rm_atomic_op {
317 int op_type;
318 union {
319 struct {
320 uint64_t compare;
321 uint64_t swap;
322 uint64_t compare_mask;
323 uint64_t swap_mask;
324 } op_m_cswp;
325 struct {
326 uint64_t add;
327 uint64_t nocarry_mask;
328 } op_m_fadd;
329 };
330
331 u32 op_rkey;
332 u64 op_remote_addr;
333 unsigned int op_notify:1;
334 unsigned int op_recverr:1;
335 unsigned int op_mapped:1;
336 unsigned int op_silent:1;
337 unsigned int op_active:1;
338 struct scatterlist *op_sg;
339 struct rds_notifier *op_notifier;
340
341 struct rds_mr *op_rdma_mr;
342 } atomic;
343 struct rm_rdma_op {
344 u32 op_rkey;
345 u64 op_remote_addr;
346 unsigned int op_write:1;
347 unsigned int op_fence:1;
348 unsigned int op_notify:1;
349 unsigned int op_recverr:1;
350 unsigned int op_mapped:1;
351 unsigned int op_silent:1;
352 unsigned int op_active:1;
353 unsigned int op_bytes;
354 unsigned int op_nents;
355 unsigned int op_count;
356 struct scatterlist *op_sg;
357 struct rds_notifier *op_notifier;
358
359 struct rds_mr *op_rdma_mr;
360 } rdma;
361 struct rm_data_op {
362 unsigned int op_active:1;
363 unsigned int op_nents;
364 unsigned int op_count;
365 struct scatterlist *op_sg;
366 } data;
367 };
268}; 368};
269 369
270/* 370/*
@@ -305,10 +405,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 405 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 406 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 407 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 408 */
313 409
314#define RDS_TRANS_IB 0 410#define RDS_TRANS_IB 0
@@ -332,13 +428,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 428 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 429 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 430 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 431 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 432 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 433 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 434 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 435 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 436 void (*inc_free)(struct rds_incoming *inc);
343 437
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 438 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +461,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 461 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 462 * support.
369 */ 463 */
370 struct rb_node rs_bound_node; 464 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 465 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 466 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 467 __be16 rs_bound_port;
374 __be16 rs_conn_port; 468 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 469 struct rds_transport *rs_transport;
382 470
383 /* 471 /*
@@ -466,8 +554,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 554 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 555 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 556 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 557 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 558 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 559 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 560 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 561 uint64_t s_send_drop_acked;
@@ -487,6 +575,7 @@ struct rds_statistics {
487}; 575};
488 576
489/* af_rds.c */ 577/* af_rds.c */
578char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 579void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 580void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 581void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +610,16 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 610struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 611
523/* conn.c */ 612/* conn.c */
524int __init rds_conn_init(void); 613int rds_conn_init(void);
525void rds_conn_exit(void); 614void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 615struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 616 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 617struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 618 struct rds_transport *trans, gfp_t gfp);
619void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 620void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 621void rds_conn_drop(struct rds_connection *conn);
622void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 623void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 624 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 625 struct rds_info_lengths *lens,
@@ -566,7 +656,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 656
567/* message.c */ 657/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 658struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 659struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
660int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 661 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 662struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 663void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -575,12 +666,9 @@ int rds_message_add_extension(struct rds_header *hdr,
575 unsigned int type, const void *data, unsigned int len); 666 unsigned int type, const void *data, unsigned int len);
576int rds_message_next_extension(struct rds_header *hdr, 667int rds_message_next_extension(struct rds_header *hdr,
577 unsigned int *pos, void *buf, unsigned int *buflen); 668 unsigned int *pos, void *buf, unsigned int *buflen);
578int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
579int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 669int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 670int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 671 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 672void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 673void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 674void rds_message_put(struct rds_message *rm);
@@ -614,7 +702,6 @@ void rds_page_exit(void);
614/* recv.c */ 702/* recv.c */
615void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 703void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
616 __be32 saddr); 704 __be32 saddr);
617void rds_inc_addref(struct rds_incoming *inc);
618void rds_inc_put(struct rds_incoming *inc); 705void rds_inc_put(struct rds_incoming *inc);
619void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, 706void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
620 struct rds_incoming *inc, gfp_t gfp, enum km_type km); 707 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
@@ -636,14 +723,38 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 723typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 724void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 725 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 726int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 727struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 728 struct rm_rdma_op *);
644 729
645/* rdma.c */ 730/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 731void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
732int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
733int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
734int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
735void rds_rdma_drop_keys(struct rds_sock *rs);
736int rds_rdma_extra_size(struct rds_rdma_args *args);
737int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
738 struct cmsghdr *cmsg);
739int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
740 struct cmsghdr *cmsg);
741int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
742 struct cmsghdr *cmsg);
743int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
744 struct cmsghdr *cmsg);
745void rds_rdma_free_op(struct rm_rdma_op *ro);
746void rds_atomic_free_op(struct rm_atomic_op *ao);
747void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
748void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
749int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
750 struct cmsghdr *cmsg);
751
752extern void __rds_put_mr_final(struct rds_mr *mr);
753static inline void rds_mr_put(struct rds_mr *mr)
754{
755 if (atomic_dec_and_test(&mr->r_refcount))
756 __rds_put_mr_final(mr);
757}
647 758
648/* stats.c */ 759/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 760DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +768,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 768 put_cpu(); \
658} while (0) 769} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 770#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 771int rds_stats_init(void);
661void rds_stats_exit(void); 772void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 773void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 774 uint64_t *values, const char *const *names,
664 size_t nr); 775 size_t nr);
665 776
666/* sysctl.c */ 777/* sysctl.c */
667int __init rds_sysctl_init(void); 778int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 779void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 780extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 781extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +789,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 789extern unsigned int rds_sysctl_trace_level;
679 790
680/* threads.c */ 791/* threads.c */
681int __init rds_threads_init(void); 792int rds_threads_init(void);
682void rds_threads_exit(void); 793void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 794extern struct workqueue_struct *rds_wq;
795void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 796void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 797void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 798void rds_send_worker(struct work_struct *);
@@ -691,9 +803,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 803int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 804void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 805struct rds_transport *rds_trans_get_preferred(__be32 addr);
806void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 807unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 808 unsigned int avail);
696int __init rds_trans_init(void); 809int rds_trans_init(void);
697void rds_trans_exit(void); 810void rds_trans_exit(void);
698 811
699#endif 812#endif