diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 13:01:29 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 13:01:29 -0400 |
| commit | 56b59b429b4c26e5e730bc8c3d837de9f7d0a966 (patch) | |
| tree | 191bf87e438a3985ccb7e3c5382fab8d31f94edb /net | |
| parent | 9a7259d5c8978bbeb5fdcf64b168f8470d8208a6 (diff) | |
| parent | c666601a935b94cc0f3310339411b6940de751ba (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates for 3.4-rc1 from Sage Weil:
"Alex has been busy. There are a range of rbd and libceph cleanups,
especially surrounding device setup and teardown, and a few critical
fixes in that code. There are more cleanups in the messenger code,
virtual xattrs, a fix for CRC calculation/checks, and lots of other
miscellaneous stuff.
There's a patch from Amon Ott to make inos behave a bit better on
32-bit boxes, some decode check fixes from Xi Wang, and network
throttling fix from Jim Schutt, and a couple RBD fixes from Josh
Durgin.
No new functionality, just a lot of cleanup and bug fixing."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (65 commits)
rbd: move snap_rwsem to the device, rename to header_rwsem
ceph: fix three bugs, two in ceph_vxattrcb_file_layout()
libceph: isolate kmap() call in write_partial_msg_pages()
libceph: rename "page_shift" variable to something sensible
libceph: get rid of zero_page_address
libceph: only call kernel_sendpage() via helper
libceph: use kernel_sendpage() for sending zeroes
libceph: fix inverted crc option logic
libceph: some simple changes
libceph: small refactor in write_partial_kvec()
libceph: do crc calculations outside loop
libceph: separate CRC calculation from byte swapping
libceph: use "do" in CRC-related Boolean variables
ceph: ensure Boolean options support both senses
libceph: a few small changes
libceph: make ceph_tcp_connect() return int
libceph: encapsulate some messenger cleanup code
libceph: make ceph_msgr_wq private
libceph: encapsulate connection kvec operations
libceph: move prepare_write_banner()
...
Diffstat (limited to 'net')
| -rw-r--r-- | net/ceph/ceph_common.c | 26 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 456 | ||||
| -rw-r--r-- | net/ceph/osdmap.c | 3 |
3 files changed, 255 insertions, 230 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 761ad9d6cc3b..cc913193d992 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -201,7 +201,9 @@ enum { | |||
| 201 | Opt_ip, | 201 | Opt_ip, |
| 202 | Opt_last_string, | 202 | Opt_last_string, |
| 203 | /* string args above */ | 203 | /* string args above */ |
| 204 | Opt_share, | ||
| 204 | Opt_noshare, | 205 | Opt_noshare, |
| 206 | Opt_crc, | ||
| 205 | Opt_nocrc, | 207 | Opt_nocrc, |
| 206 | }; | 208 | }; |
| 207 | 209 | ||
| @@ -217,7 +219,9 @@ static match_table_t opt_tokens = { | |||
| 217 | {Opt_key, "key=%s"}, | 219 | {Opt_key, "key=%s"}, |
| 218 | {Opt_ip, "ip=%s"}, | 220 | {Opt_ip, "ip=%s"}, |
| 219 | /* string args above */ | 221 | /* string args above */ |
| 222 | {Opt_share, "share"}, | ||
| 220 | {Opt_noshare, "noshare"}, | 223 | {Opt_noshare, "noshare"}, |
| 224 | {Opt_crc, "crc"}, | ||
| 221 | {Opt_nocrc, "nocrc"}, | 225 | {Opt_nocrc, "nocrc"}, |
| 222 | {-1, NULL} | 226 | {-1, NULL} |
| 223 | }; | 227 | }; |
| @@ -277,10 +281,11 @@ out: | |||
| 277 | return err; | 281 | return err; |
| 278 | } | 282 | } |
| 279 | 283 | ||
| 280 | int ceph_parse_options(struct ceph_options **popt, char *options, | 284 | struct ceph_options * |
| 281 | const char *dev_name, const char *dev_name_end, | 285 | ceph_parse_options(char *options, const char *dev_name, |
| 282 | int (*parse_extra_token)(char *c, void *private), | 286 | const char *dev_name_end, |
| 283 | void *private) | 287 | int (*parse_extra_token)(char *c, void *private), |
| 288 | void *private) | ||
| 284 | { | 289 | { |
| 285 | struct ceph_options *opt; | 290 | struct ceph_options *opt; |
| 286 | const char *c; | 291 | const char *c; |
| @@ -289,7 +294,7 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
| 289 | 294 | ||
| 290 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | 295 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); |
| 291 | if (!opt) | 296 | if (!opt) |
| 292 | return err; | 297 | return ERR_PTR(-ENOMEM); |
| 293 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | 298 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), |
| 294 | GFP_KERNEL); | 299 | GFP_KERNEL); |
| 295 | if (!opt->mon_addr) | 300 | if (!opt->mon_addr) |
| @@ -398,10 +403,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
| 398 | opt->mount_timeout = intval; | 403 | opt->mount_timeout = intval; |
| 399 | break; | 404 | break; |
| 400 | 405 | ||
| 406 | case Opt_share: | ||
| 407 | opt->flags &= ~CEPH_OPT_NOSHARE; | ||
| 408 | break; | ||
| 401 | case Opt_noshare: | 409 | case Opt_noshare: |
| 402 | opt->flags |= CEPH_OPT_NOSHARE; | 410 | opt->flags |= CEPH_OPT_NOSHARE; |
| 403 | break; | 411 | break; |
| 404 | 412 | ||
| 413 | case Opt_crc: | ||
| 414 | opt->flags &= ~CEPH_OPT_NOCRC; | ||
| 415 | break; | ||
| 405 | case Opt_nocrc: | 416 | case Opt_nocrc: |
| 406 | opt->flags |= CEPH_OPT_NOCRC; | 417 | opt->flags |= CEPH_OPT_NOCRC; |
| 407 | break; | 418 | break; |
| @@ -412,12 +423,11 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
| 412 | } | 423 | } |
| 413 | 424 | ||
| 414 | /* success */ | 425 | /* success */ |
| 415 | *popt = opt; | 426 | return opt; |
| 416 | return 0; | ||
| 417 | 427 | ||
| 418 | out: | 428 | out: |
| 419 | ceph_destroy_options(opt); | 429 | ceph_destroy_options(opt); |
| 420 | return err; | 430 | return ERR_PTR(err); |
| 421 | } | 431 | } |
| 422 | EXPORT_SYMBOL(ceph_parse_options); | 432 | EXPORT_SYMBOL(ceph_parse_options); |
| 423 | 433 | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad5b70801f37..f0993af2ae4d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -38,48 +38,54 @@ static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | |||
| 38 | static struct lock_class_key socket_class; | 38 | static struct lock_class_key socket_class; |
| 39 | #endif | 39 | #endif |
| 40 | 40 | ||
| 41 | /* | ||
| 42 | * When skipping (ignoring) a block of input we read it into a "skip | ||
| 43 | * buffer," which is this many bytes in size. | ||
| 44 | */ | ||
| 45 | #define SKIP_BUF_SIZE 1024 | ||
| 41 | 46 | ||
| 42 | static void queue_con(struct ceph_connection *con); | 47 | static void queue_con(struct ceph_connection *con); |
| 43 | static void con_work(struct work_struct *); | 48 | static void con_work(struct work_struct *); |
| 44 | static void ceph_fault(struct ceph_connection *con); | 49 | static void ceph_fault(struct ceph_connection *con); |
| 45 | 50 | ||
| 46 | /* | 51 | /* |
| 47 | * nicely render a sockaddr as a string. | 52 | * Nicely render a sockaddr as a string. An array of formatted |
| 53 | * strings is used, to approximate reentrancy. | ||
| 48 | */ | 54 | */ |
| 49 | #define MAX_ADDR_STR 20 | 55 | #define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */ |
| 50 | #define MAX_ADDR_STR_LEN 60 | 56 | #define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG) |
| 51 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | 57 | #define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1) |
| 52 | static DEFINE_SPINLOCK(addr_str_lock); | 58 | #define MAX_ADDR_STR_LEN 64 /* 54 is enough */ |
| 53 | static int last_addr_str; | 59 | |
| 60 | static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN]; | ||
| 61 | static atomic_t addr_str_seq = ATOMIC_INIT(0); | ||
| 62 | |||
| 63 | static struct page *zero_page; /* used in certain error cases */ | ||
| 54 | 64 | ||
| 55 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) | 65 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
| 56 | { | 66 | { |
| 57 | int i; | 67 | int i; |
| 58 | char *s; | 68 | char *s; |
| 59 | struct sockaddr_in *in4 = (void *)ss; | 69 | struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
| 60 | struct sockaddr_in6 *in6 = (void *)ss; | 70 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
| 61 | 71 | ||
| 62 | spin_lock(&addr_str_lock); | 72 | i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; |
| 63 | i = last_addr_str++; | ||
| 64 | if (last_addr_str == MAX_ADDR_STR) | ||
| 65 | last_addr_str = 0; | ||
| 66 | spin_unlock(&addr_str_lock); | ||
| 67 | s = addr_str[i]; | 73 | s = addr_str[i]; |
| 68 | 74 | ||
| 69 | switch (ss->ss_family) { | 75 | switch (ss->ss_family) { |
| 70 | case AF_INET: | 76 | case AF_INET: |
| 71 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, | 77 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, |
| 72 | (unsigned int)ntohs(in4->sin_port)); | 78 | ntohs(in4->sin_port)); |
| 73 | break; | 79 | break; |
| 74 | 80 | ||
| 75 | case AF_INET6: | 81 | case AF_INET6: |
| 76 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, | 82 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, |
| 77 | (unsigned int)ntohs(in6->sin6_port)); | 83 | ntohs(in6->sin6_port)); |
| 78 | break; | 84 | break; |
| 79 | 85 | ||
| 80 | default: | 86 | default: |
| 81 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", | 87 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", |
| 82 | (int)ss->ss_family); | 88 | ss->ss_family); |
| 83 | } | 89 | } |
| 84 | 90 | ||
| 85 | return s; | 91 | return s; |
| @@ -95,22 +101,43 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||
| 95 | /* | 101 | /* |
| 96 | * work queue for all reading and writing to/from the socket. | 102 | * work queue for all reading and writing to/from the socket. |
| 97 | */ | 103 | */ |
| 98 | struct workqueue_struct *ceph_msgr_wq; | 104 | static struct workqueue_struct *ceph_msgr_wq; |
| 105 | |||
| 106 | void _ceph_msgr_exit(void) | ||
| 107 | { | ||
| 108 | if (ceph_msgr_wq) { | ||
| 109 | destroy_workqueue(ceph_msgr_wq); | ||
| 110 | ceph_msgr_wq = NULL; | ||
| 111 | } | ||
| 112 | |||
| 113 | BUG_ON(zero_page == NULL); | ||
| 114 | kunmap(zero_page); | ||
| 115 | page_cache_release(zero_page); | ||
| 116 | zero_page = NULL; | ||
| 117 | } | ||
| 99 | 118 | ||
| 100 | int ceph_msgr_init(void) | 119 | int ceph_msgr_init(void) |
| 101 | { | 120 | { |
| 121 | BUG_ON(zero_page != NULL); | ||
| 122 | zero_page = ZERO_PAGE(0); | ||
| 123 | page_cache_get(zero_page); | ||
| 124 | |||
| 102 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); | 125 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); |
| 103 | if (!ceph_msgr_wq) { | 126 | if (ceph_msgr_wq) |
| 104 | pr_err("msgr_init failed to create workqueue\n"); | 127 | return 0; |
| 105 | return -ENOMEM; | 128 | |
| 106 | } | 129 | pr_err("msgr_init failed to create workqueue\n"); |
| 107 | return 0; | 130 | _ceph_msgr_exit(); |
| 131 | |||
| 132 | return -ENOMEM; | ||
| 108 | } | 133 | } |
| 109 | EXPORT_SYMBOL(ceph_msgr_init); | 134 | EXPORT_SYMBOL(ceph_msgr_init); |
| 110 | 135 | ||
| 111 | void ceph_msgr_exit(void) | 136 | void ceph_msgr_exit(void) |
| 112 | { | 137 | { |
| 113 | destroy_workqueue(ceph_msgr_wq); | 138 | BUG_ON(ceph_msgr_wq == NULL); |
| 139 | |||
| 140 | _ceph_msgr_exit(); | ||
| 114 | } | 141 | } |
| 115 | EXPORT_SYMBOL(ceph_msgr_exit); | 142 | EXPORT_SYMBOL(ceph_msgr_exit); |
| 116 | 143 | ||
| @@ -128,8 +155,8 @@ EXPORT_SYMBOL(ceph_msgr_flush); | |||
| 128 | /* data available on socket, or listen socket received a connect */ | 155 | /* data available on socket, or listen socket received a connect */ |
| 129 | static void ceph_data_ready(struct sock *sk, int count_unused) | 156 | static void ceph_data_ready(struct sock *sk, int count_unused) |
| 130 | { | 157 | { |
| 131 | struct ceph_connection *con = | 158 | struct ceph_connection *con = sk->sk_user_data; |
| 132 | (struct ceph_connection *)sk->sk_user_data; | 159 | |
| 133 | if (sk->sk_state != TCP_CLOSE_WAIT) { | 160 | if (sk->sk_state != TCP_CLOSE_WAIT) { |
| 134 | dout("ceph_data_ready on %p state = %lu, queueing work\n", | 161 | dout("ceph_data_ready on %p state = %lu, queueing work\n", |
| 135 | con, con->state); | 162 | con, con->state); |
| @@ -140,26 +167,30 @@ static void ceph_data_ready(struct sock *sk, int count_unused) | |||
| 140 | /* socket has buffer space for writing */ | 167 | /* socket has buffer space for writing */ |
| 141 | static void ceph_write_space(struct sock *sk) | 168 | static void ceph_write_space(struct sock *sk) |
| 142 | { | 169 | { |
| 143 | struct ceph_connection *con = | 170 | struct ceph_connection *con = sk->sk_user_data; |
| 144 | (struct ceph_connection *)sk->sk_user_data; | ||
| 145 | 171 | ||
| 146 | /* only queue to workqueue if there is data we want to write. */ | 172 | /* only queue to workqueue if there is data we want to write, |
| 173 | * and there is sufficient space in the socket buffer to accept | ||
| 174 | * more data. clear SOCK_NOSPACE so that ceph_write_space() | ||
| 175 | * doesn't get called again until try_write() fills the socket | ||
| 176 | * buffer. See net/ipv4/tcp_input.c:tcp_check_space() | ||
| 177 | * and net/core/stream.c:sk_stream_write_space(). | ||
| 178 | */ | ||
| 147 | if (test_bit(WRITE_PENDING, &con->state)) { | 179 | if (test_bit(WRITE_PENDING, &con->state)) { |
| 148 | dout("ceph_write_space %p queueing write work\n", con); | 180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { |
| 149 | queue_con(con); | 181 | dout("ceph_write_space %p queueing write work\n", con); |
| 182 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
| 183 | queue_con(con); | ||
| 184 | } | ||
| 150 | } else { | 185 | } else { |
| 151 | dout("ceph_write_space %p nothing to write\n", con); | 186 | dout("ceph_write_space %p nothing to write\n", con); |
| 152 | } | 187 | } |
| 153 | |||
| 154 | /* since we have our own write_space, clear the SOCK_NOSPACE flag */ | ||
| 155 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
| 156 | } | 188 | } |
| 157 | 189 | ||
| 158 | /* socket's state has changed */ | 190 | /* socket's state has changed */ |
| 159 | static void ceph_state_change(struct sock *sk) | 191 | static void ceph_state_change(struct sock *sk) |
| 160 | { | 192 | { |
| 161 | struct ceph_connection *con = | 193 | struct ceph_connection *con = sk->sk_user_data; |
| 162 | (struct ceph_connection *)sk->sk_user_data; | ||
| 163 | 194 | ||
| 164 | dout("ceph_state_change %p state = %lu sk_state = %u\n", | 195 | dout("ceph_state_change %p state = %lu sk_state = %u\n", |
| 165 | con, con->state, sk->sk_state); | 196 | con, con->state, sk->sk_state); |
| @@ -184,6 +215,8 @@ static void ceph_state_change(struct sock *sk) | |||
| 184 | dout("ceph_state_change TCP_ESTABLISHED\n"); | 215 | dout("ceph_state_change TCP_ESTABLISHED\n"); |
| 185 | queue_con(con); | 216 | queue_con(con); |
| 186 | break; | 217 | break; |
| 218 | default: /* Everything else is uninteresting */ | ||
| 219 | break; | ||
| 187 | } | 220 | } |
| 188 | } | 221 | } |
| 189 | 222 | ||
| @@ -194,7 +227,7 @@ static void set_sock_callbacks(struct socket *sock, | |||
| 194 | struct ceph_connection *con) | 227 | struct ceph_connection *con) |
| 195 | { | 228 | { |
| 196 | struct sock *sk = sock->sk; | 229 | struct sock *sk = sock->sk; |
| 197 | sk->sk_user_data = (void *)con; | 230 | sk->sk_user_data = con; |
| 198 | sk->sk_data_ready = ceph_data_ready; | 231 | sk->sk_data_ready = ceph_data_ready; |
| 199 | sk->sk_write_space = ceph_write_space; | 232 | sk->sk_write_space = ceph_write_space; |
| 200 | sk->sk_state_change = ceph_state_change; | 233 | sk->sk_state_change = ceph_state_change; |
| @@ -208,7 +241,7 @@ static void set_sock_callbacks(struct socket *sock, | |||
| 208 | /* | 241 | /* |
| 209 | * initiate connection to a remote socket. | 242 | * initiate connection to a remote socket. |
| 210 | */ | 243 | */ |
| 211 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) | 244 | static int ceph_tcp_connect(struct ceph_connection *con) |
| 212 | { | 245 | { |
| 213 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; | 246 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
| 214 | struct socket *sock; | 247 | struct socket *sock; |
| @@ -218,8 +251,7 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
| 218 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, | 251 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, |
| 219 | IPPROTO_TCP, &sock); | 252 | IPPROTO_TCP, &sock); |
| 220 | if (ret) | 253 | if (ret) |
| 221 | return ERR_PTR(ret); | 254 | return ret; |
| 222 | con->sock = sock; | ||
| 223 | sock->sk->sk_allocation = GFP_NOFS; | 255 | sock->sk->sk_allocation = GFP_NOFS; |
| 224 | 256 | ||
| 225 | #ifdef CONFIG_LOCKDEP | 257 | #ifdef CONFIG_LOCKDEP |
| @@ -236,19 +268,17 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
| 236 | dout("connect %s EINPROGRESS sk_state = %u\n", | 268 | dout("connect %s EINPROGRESS sk_state = %u\n", |
| 237 | ceph_pr_addr(&con->peer_addr.in_addr), | 269 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 238 | sock->sk->sk_state); | 270 | sock->sk->sk_state); |
| 239 | ret = 0; | 271 | } else if (ret < 0) { |
| 240 | } | ||
| 241 | if (ret < 0) { | ||
| 242 | pr_err("connect %s error %d\n", | 272 | pr_err("connect %s error %d\n", |
| 243 | ceph_pr_addr(&con->peer_addr.in_addr), ret); | 273 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
| 244 | sock_release(sock); | 274 | sock_release(sock); |
| 245 | con->sock = NULL; | ||
| 246 | con->error_msg = "connect error"; | 275 | con->error_msg = "connect error"; |
| 276 | |||
| 277 | return ret; | ||
| 247 | } | 278 | } |
| 279 | con->sock = sock; | ||
| 248 | 280 | ||
| 249 | if (ret < 0) | 281 | return 0; |
| 250 | return ERR_PTR(ret); | ||
| 251 | return sock; | ||
| 252 | } | 282 | } |
| 253 | 283 | ||
| 254 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) | 284 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) |
| @@ -284,6 +314,19 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, | |||
| 284 | return r; | 314 | return r; |
| 285 | } | 315 | } |
| 286 | 316 | ||
| 317 | static int ceph_tcp_sendpage(struct socket *sock, struct page *page, | ||
| 318 | int offset, size_t size, int more) | ||
| 319 | { | ||
| 320 | int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); | ||
| 321 | int ret; | ||
| 322 | |||
| 323 | ret = kernel_sendpage(sock, page, offset, size, flags); | ||
| 324 | if (ret == -EAGAIN) | ||
| 325 | ret = 0; | ||
| 326 | |||
| 327 | return ret; | ||
| 328 | } | ||
| 329 | |||
| 287 | 330 | ||
| 288 | /* | 331 | /* |
| 289 | * Shutdown/close the socket for the given connection. | 332 | * Shutdown/close the socket for the given connection. |
| @@ -391,22 +434,23 @@ bool ceph_con_opened(struct ceph_connection *con) | |||
| 391 | */ | 434 | */ |
| 392 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | 435 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) |
| 393 | { | 436 | { |
| 394 | dout("con_get %p nref = %d -> %d\n", con, | 437 | int nref = __atomic_add_unless(&con->nref, 1, 0); |
| 395 | atomic_read(&con->nref), atomic_read(&con->nref) + 1); | 438 | |
| 396 | if (atomic_inc_not_zero(&con->nref)) | 439 | dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); |
| 397 | return con; | 440 | |
| 398 | return NULL; | 441 | return nref ? con : NULL; |
| 399 | } | 442 | } |
| 400 | 443 | ||
| 401 | void ceph_con_put(struct ceph_connection *con) | 444 | void ceph_con_put(struct ceph_connection *con) |
| 402 | { | 445 | { |
| 403 | dout("con_put %p nref = %d -> %d\n", con, | 446 | int nref = atomic_dec_return(&con->nref); |
| 404 | atomic_read(&con->nref), atomic_read(&con->nref) - 1); | 447 | |
| 405 | BUG_ON(atomic_read(&con->nref) == 0); | 448 | BUG_ON(nref < 0); |
| 406 | if (atomic_dec_and_test(&con->nref)) { | 449 | if (nref == 0) { |
| 407 | BUG_ON(con->sock); | 450 | BUG_ON(con->sock); |
| 408 | kfree(con); | 451 | kfree(con); |
| 409 | } | 452 | } |
| 453 | dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); | ||
| 410 | } | 454 | } |
| 411 | 455 | ||
| 412 | /* | 456 | /* |
| @@ -442,14 +486,35 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | |||
| 442 | return ret; | 486 | return ret; |
| 443 | } | 487 | } |
| 444 | 488 | ||
| 489 | static void ceph_con_out_kvec_reset(struct ceph_connection *con) | ||
| 490 | { | ||
| 491 | con->out_kvec_left = 0; | ||
| 492 | con->out_kvec_bytes = 0; | ||
| 493 | con->out_kvec_cur = &con->out_kvec[0]; | ||
| 494 | } | ||
| 495 | |||
| 496 | static void ceph_con_out_kvec_add(struct ceph_connection *con, | ||
| 497 | size_t size, void *data) | ||
| 498 | { | ||
| 499 | int index; | ||
| 500 | |||
| 501 | index = con->out_kvec_left; | ||
| 502 | BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); | ||
| 503 | |||
| 504 | con->out_kvec[index].iov_len = size; | ||
| 505 | con->out_kvec[index].iov_base = data; | ||
| 506 | con->out_kvec_left++; | ||
| 507 | con->out_kvec_bytes += size; | ||
| 508 | } | ||
| 445 | 509 | ||
| 446 | /* | 510 | /* |
| 447 | * Prepare footer for currently outgoing message, and finish things | 511 | * Prepare footer for currently outgoing message, and finish things |
| 448 | * off. Assumes out_kvec* are already valid.. we just add on to the end. | 512 | * off. Assumes out_kvec* are already valid.. we just add on to the end. |
| 449 | */ | 513 | */ |
| 450 | static void prepare_write_message_footer(struct ceph_connection *con, int v) | 514 | static void prepare_write_message_footer(struct ceph_connection *con) |
| 451 | { | 515 | { |
| 452 | struct ceph_msg *m = con->out_msg; | 516 | struct ceph_msg *m = con->out_msg; |
| 517 | int v = con->out_kvec_left; | ||
| 453 | 518 | ||
| 454 | dout("prepare_write_message_footer %p\n", con); | 519 | dout("prepare_write_message_footer %p\n", con); |
| 455 | con->out_kvec_is_msg = true; | 520 | con->out_kvec_is_msg = true; |
| @@ -467,9 +532,9 @@ static void prepare_write_message_footer(struct ceph_connection *con, int v) | |||
| 467 | static void prepare_write_message(struct ceph_connection *con) | 532 | static void prepare_write_message(struct ceph_connection *con) |
| 468 | { | 533 | { |
| 469 | struct ceph_msg *m; | 534 | struct ceph_msg *m; |
| 470 | int v = 0; | 535 | u32 crc; |
| 471 | 536 | ||
| 472 | con->out_kvec_bytes = 0; | 537 | ceph_con_out_kvec_reset(con); |
| 473 | con->out_kvec_is_msg = true; | 538 | con->out_kvec_is_msg = true; |
| 474 | con->out_msg_done = false; | 539 | con->out_msg_done = false; |
| 475 | 540 | ||
| @@ -477,16 +542,13 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 477 | * TCP packet that's a good thing. */ | 542 | * TCP packet that's a good thing. */ |
| 478 | if (con->in_seq > con->in_seq_acked) { | 543 | if (con->in_seq > con->in_seq_acked) { |
| 479 | con->in_seq_acked = con->in_seq; | 544 | con->in_seq_acked = con->in_seq; |
| 480 | con->out_kvec[v].iov_base = &tag_ack; | 545 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); |
| 481 | con->out_kvec[v++].iov_len = 1; | ||
| 482 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 546 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
| 483 | con->out_kvec[v].iov_base = &con->out_temp_ack; | 547 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), |
| 484 | con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); | 548 | &con->out_temp_ack); |
| 485 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
| 486 | } | 549 | } |
| 487 | 550 | ||
| 488 | m = list_first_entry(&con->out_queue, | 551 | m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); |
| 489 | struct ceph_msg, list_head); | ||
| 490 | con->out_msg = m; | 552 | con->out_msg = m; |
| 491 | 553 | ||
| 492 | /* put message on sent list */ | 554 | /* put message on sent list */ |
| @@ -510,30 +572,26 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 510 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); | 572 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); |
| 511 | 573 | ||
| 512 | /* tag + hdr + front + middle */ | 574 | /* tag + hdr + front + middle */ |
| 513 | con->out_kvec[v].iov_base = &tag_msg; | 575 | ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); |
| 514 | con->out_kvec[v++].iov_len = 1; | 576 | ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); |
| 515 | con->out_kvec[v].iov_base = &m->hdr; | 577 | ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); |
| 516 | con->out_kvec[v++].iov_len = sizeof(m->hdr); | 578 | |
| 517 | con->out_kvec[v++] = m->front; | ||
| 518 | if (m->middle) | 579 | if (m->middle) |
| 519 | con->out_kvec[v++] = m->middle->vec; | 580 | ceph_con_out_kvec_add(con, m->middle->vec.iov_len, |
| 520 | con->out_kvec_left = v; | 581 | m->middle->vec.iov_base); |
| 521 | con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + | ||
| 522 | (m->middle ? m->middle->vec.iov_len : 0); | ||
| 523 | con->out_kvec_cur = con->out_kvec; | ||
| 524 | 582 | ||
| 525 | /* fill in crc (except data pages), footer */ | 583 | /* fill in crc (except data pages), footer */ |
| 526 | con->out_msg->hdr.crc = | 584 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); |
| 527 | cpu_to_le32(crc32c(0, (void *)&m->hdr, | 585 | con->out_msg->hdr.crc = cpu_to_le32(crc); |
| 528 | sizeof(m->hdr) - sizeof(m->hdr.crc))); | ||
| 529 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; | 586 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; |
| 530 | con->out_msg->footer.front_crc = | 587 | |
| 531 | cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); | 588 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); |
| 532 | if (m->middle) | 589 | con->out_msg->footer.front_crc = cpu_to_le32(crc); |
| 533 | con->out_msg->footer.middle_crc = | 590 | if (m->middle) { |
| 534 | cpu_to_le32(crc32c(0, m->middle->vec.iov_base, | 591 | crc = crc32c(0, m->middle->vec.iov_base, |
| 535 | m->middle->vec.iov_len)); | 592 | m->middle->vec.iov_len); |
| 536 | else | 593 | con->out_msg->footer.middle_crc = cpu_to_le32(crc); |
| 594 | } else | ||
| 537 | con->out_msg->footer.middle_crc = 0; | 595 | con->out_msg->footer.middle_crc = 0; |
| 538 | con->out_msg->footer.data_crc = 0; | 596 | con->out_msg->footer.data_crc = 0; |
| 539 | dout("prepare_write_message front_crc %u data_crc %u\n", | 597 | dout("prepare_write_message front_crc %u data_crc %u\n", |
| @@ -549,11 +607,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 549 | else | 607 | else |
| 550 | con->out_msg_pos.page_pos = 0; | 608 | con->out_msg_pos.page_pos = 0; |
| 551 | con->out_msg_pos.data_pos = 0; | 609 | con->out_msg_pos.data_pos = 0; |
| 552 | con->out_msg_pos.did_page_crc = 0; | 610 | con->out_msg_pos.did_page_crc = false; |
| 553 | con->out_more = 1; /* data + footer will follow */ | 611 | con->out_more = 1; /* data + footer will follow */ |
| 554 | } else { | 612 | } else { |
| 555 | /* no, queue up footer too and be done */ | 613 | /* no, queue up footer too and be done */ |
| 556 | prepare_write_message_footer(con, v); | 614 | prepare_write_message_footer(con); |
| 557 | } | 615 | } |
| 558 | 616 | ||
| 559 | set_bit(WRITE_PENDING, &con->state); | 617 | set_bit(WRITE_PENDING, &con->state); |
| @@ -568,14 +626,14 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
| 568 | con->in_seq_acked, con->in_seq); | 626 | con->in_seq_acked, con->in_seq); |
| 569 | con->in_seq_acked = con->in_seq; | 627 | con->in_seq_acked = con->in_seq; |
| 570 | 628 | ||
| 571 | con->out_kvec[0].iov_base = &tag_ack; | 629 | ceph_con_out_kvec_reset(con); |
| 572 | con->out_kvec[0].iov_len = 1; | 630 | |
| 631 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); | ||
| 632 | |||
| 573 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 633 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
| 574 | con->out_kvec[1].iov_base = &con->out_temp_ack; | 634 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), |
| 575 | con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); | 635 | &con->out_temp_ack); |
| 576 | con->out_kvec_left = 2; | 636 | |
| 577 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
| 578 | con->out_kvec_cur = con->out_kvec; | ||
| 579 | con->out_more = 1; /* more will follow.. eventually.. */ | 637 | con->out_more = 1; /* more will follow.. eventually.. */ |
| 580 | set_bit(WRITE_PENDING, &con->state); | 638 | set_bit(WRITE_PENDING, &con->state); |
| 581 | } | 639 | } |
| @@ -586,11 +644,8 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
| 586 | static void prepare_write_keepalive(struct ceph_connection *con) | 644 | static void prepare_write_keepalive(struct ceph_connection *con) |
| 587 | { | 645 | { |
| 588 | dout("prepare_write_keepalive %p\n", con); | 646 | dout("prepare_write_keepalive %p\n", con); |
| 589 | con->out_kvec[0].iov_base = &tag_keepalive; | 647 | ceph_con_out_kvec_reset(con); |
| 590 | con->out_kvec[0].iov_len = 1; | 648 | ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); |
| 591 | con->out_kvec_left = 1; | ||
| 592 | con->out_kvec_bytes = 1; | ||
| 593 | con->out_kvec_cur = con->out_kvec; | ||
| 594 | set_bit(WRITE_PENDING, &con->state); | 649 | set_bit(WRITE_PENDING, &con->state); |
| 595 | } | 650 | } |
| 596 | 651 | ||
| @@ -619,12 +674,9 @@ static int prepare_connect_authorizer(struct ceph_connection *con) | |||
| 619 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); | 674 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); |
| 620 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); | 675 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); |
| 621 | 676 | ||
| 622 | if (auth_len) { | 677 | if (auth_len) |
| 623 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; | 678 | ceph_con_out_kvec_add(con, auth_len, auth_buf); |
| 624 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; | 679 | |
| 625 | con->out_kvec_left++; | ||
| 626 | con->out_kvec_bytes += auth_len; | ||
| 627 | } | ||
| 628 | return 0; | 680 | return 0; |
| 629 | } | 681 | } |
| 630 | 682 | ||
| @@ -634,22 +686,18 @@ static int prepare_connect_authorizer(struct ceph_connection *con) | |||
| 634 | static void prepare_write_banner(struct ceph_messenger *msgr, | 686 | static void prepare_write_banner(struct ceph_messenger *msgr, |
| 635 | struct ceph_connection *con) | 687 | struct ceph_connection *con) |
| 636 | { | 688 | { |
| 637 | int len = strlen(CEPH_BANNER); | 689 | ceph_con_out_kvec_reset(con); |
| 690 | ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); | ||
| 691 | ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr), | ||
| 692 | &msgr->my_enc_addr); | ||
| 638 | 693 | ||
| 639 | con->out_kvec[0].iov_base = CEPH_BANNER; | ||
| 640 | con->out_kvec[0].iov_len = len; | ||
| 641 | con->out_kvec[1].iov_base = &msgr->my_enc_addr; | ||
| 642 | con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); | ||
| 643 | con->out_kvec_left = 2; | ||
| 644 | con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); | ||
| 645 | con->out_kvec_cur = con->out_kvec; | ||
| 646 | con->out_more = 0; | 694 | con->out_more = 0; |
| 647 | set_bit(WRITE_PENDING, &con->state); | 695 | set_bit(WRITE_PENDING, &con->state); |
| 648 | } | 696 | } |
| 649 | 697 | ||
| 650 | static int prepare_write_connect(struct ceph_messenger *msgr, | 698 | static int prepare_write_connect(struct ceph_messenger *msgr, |
| 651 | struct ceph_connection *con, | 699 | struct ceph_connection *con, |
| 652 | int after_banner) | 700 | int include_banner) |
| 653 | { | 701 | { |
| 654 | unsigned global_seq = get_global_seq(con->msgr, 0); | 702 | unsigned global_seq = get_global_seq(con->msgr, 0); |
| 655 | int proto; | 703 | int proto; |
| @@ -678,22 +726,18 @@ static int prepare_write_connect(struct ceph_messenger *msgr, | |||
| 678 | con->out_connect.protocol_version = cpu_to_le32(proto); | 726 | con->out_connect.protocol_version = cpu_to_le32(proto); |
| 679 | con->out_connect.flags = 0; | 727 | con->out_connect.flags = 0; |
| 680 | 728 | ||
| 681 | if (!after_banner) { | 729 | if (include_banner) |
| 682 | con->out_kvec_left = 0; | 730 | prepare_write_banner(msgr, con); |
| 683 | con->out_kvec_bytes = 0; | 731 | else |
| 684 | } | 732 | ceph_con_out_kvec_reset(con); |
| 685 | con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; | 733 | ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); |
| 686 | con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); | 734 | |
| 687 | con->out_kvec_left++; | ||
| 688 | con->out_kvec_bytes += sizeof(con->out_connect); | ||
| 689 | con->out_kvec_cur = con->out_kvec; | ||
| 690 | con->out_more = 0; | 735 | con->out_more = 0; |
| 691 | set_bit(WRITE_PENDING, &con->state); | 736 | set_bit(WRITE_PENDING, &con->state); |
| 692 | 737 | ||
| 693 | return prepare_connect_authorizer(con); | 738 | return prepare_connect_authorizer(con); |
| 694 | } | 739 | } |
| 695 | 740 | ||
| 696 | |||
| 697 | /* | 741 | /* |
| 698 | * write as much of pending kvecs to the socket as we can. | 742 | * write as much of pending kvecs to the socket as we can. |
| 699 | * 1 -> done | 743 | * 1 -> done |
| @@ -714,17 +758,18 @@ static int write_partial_kvec(struct ceph_connection *con) | |||
| 714 | con->out_kvec_bytes -= ret; | 758 | con->out_kvec_bytes -= ret; |
| 715 | if (con->out_kvec_bytes == 0) | 759 | if (con->out_kvec_bytes == 0) |
| 716 | break; /* done */ | 760 | break; /* done */ |
| 717 | while (ret > 0) { | 761 | |
| 718 | if (ret >= con->out_kvec_cur->iov_len) { | 762 | /* account for full iov entries consumed */ |
| 719 | ret -= con->out_kvec_cur->iov_len; | 763 | while (ret >= con->out_kvec_cur->iov_len) { |
| 720 | con->out_kvec_cur++; | 764 | BUG_ON(!con->out_kvec_left); |
| 721 | con->out_kvec_left--; | 765 | ret -= con->out_kvec_cur->iov_len; |
| 722 | } else { | 766 | con->out_kvec_cur++; |
| 723 | con->out_kvec_cur->iov_len -= ret; | 767 | con->out_kvec_left--; |
| 724 | con->out_kvec_cur->iov_base += ret; | 768 | } |
| 725 | ret = 0; | 769 | /* and for a partially-consumed entry */ |
| 726 | break; | 770 | if (ret) { |
| 727 | } | 771 | con->out_kvec_cur->iov_len -= ret; |
| 772 | con->out_kvec_cur->iov_base += ret; | ||
| 728 | } | 773 | } |
| 729 | } | 774 | } |
| 730 | con->out_kvec_left = 0; | 775 | con->out_kvec_left = 0; |
| @@ -773,7 +818,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 773 | struct ceph_msg *msg = con->out_msg; | 818 | struct ceph_msg *msg = con->out_msg; |
| 774 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); | 819 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); |
| 775 | size_t len; | 820 | size_t len; |
| 776 | int crc = con->msgr->nocrc; | 821 | bool do_datacrc = !con->msgr->nocrc; |
| 777 | int ret; | 822 | int ret; |
| 778 | int total_max_write; | 823 | int total_max_write; |
| 779 | int in_trail = 0; | 824 | int in_trail = 0; |
| @@ -790,9 +835,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 790 | 835 | ||
| 791 | while (data_len > con->out_msg_pos.data_pos) { | 836 | while (data_len > con->out_msg_pos.data_pos) { |
| 792 | struct page *page = NULL; | 837 | struct page *page = NULL; |
| 793 | void *kaddr = NULL; | ||
| 794 | int max_write = PAGE_SIZE; | 838 | int max_write = PAGE_SIZE; |
| 795 | int page_shift = 0; | 839 | int bio_offset = 0; |
| 796 | 840 | ||
| 797 | total_max_write = data_len - trail_len - | 841 | total_max_write = data_len - trail_len - |
| 798 | con->out_msg_pos.data_pos; | 842 | con->out_msg_pos.data_pos; |
| @@ -811,58 +855,47 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 811 | 855 | ||
| 812 | page = list_first_entry(&msg->trail->head, | 856 | page = list_first_entry(&msg->trail->head, |
| 813 | struct page, lru); | 857 | struct page, lru); |
| 814 | if (crc) | ||
| 815 | kaddr = kmap(page); | ||
| 816 | max_write = PAGE_SIZE; | 858 | max_write = PAGE_SIZE; |
| 817 | } else if (msg->pages) { | 859 | } else if (msg->pages) { |
| 818 | page = msg->pages[con->out_msg_pos.page]; | 860 | page = msg->pages[con->out_msg_pos.page]; |
| 819 | if (crc) | ||
| 820 | kaddr = kmap(page); | ||
| 821 | } else if (msg->pagelist) { | 861 | } else if (msg->pagelist) { |
| 822 | page = list_first_entry(&msg->pagelist->head, | 862 | page = list_first_entry(&msg->pagelist->head, |
| 823 | struct page, lru); | 863 | struct page, lru); |
| 824 | if (crc) | ||
| 825 | kaddr = kmap(page); | ||
| 826 | #ifdef CONFIG_BLOCK | 864 | #ifdef CONFIG_BLOCK |
| 827 | } else if (msg->bio) { | 865 | } else if (msg->bio) { |
| 828 | struct bio_vec *bv; | 866 | struct bio_vec *bv; |
| 829 | 867 | ||
| 830 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | 868 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); |
| 831 | page = bv->bv_page; | 869 | page = bv->bv_page; |
| 832 | page_shift = bv->bv_offset; | 870 | bio_offset = bv->bv_offset; |
| 833 | if (crc) | ||
| 834 | kaddr = kmap(page) + page_shift; | ||
| 835 | max_write = bv->bv_len; | 871 | max_write = bv->bv_len; |
| 836 | #endif | 872 | #endif |
| 837 | } else { | 873 | } else { |
| 838 | page = con->msgr->zero_page; | 874 | page = zero_page; |
| 839 | if (crc) | ||
| 840 | kaddr = page_address(con->msgr->zero_page); | ||
| 841 | } | 875 | } |
| 842 | len = min_t(int, max_write - con->out_msg_pos.page_pos, | 876 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
| 843 | total_max_write); | 877 | total_max_write); |
| 844 | 878 | ||
| 845 | if (crc && !con->out_msg_pos.did_page_crc) { | 879 | if (do_datacrc && !con->out_msg_pos.did_page_crc) { |
| 846 | void *base = kaddr + con->out_msg_pos.page_pos; | 880 | void *base; |
| 881 | u32 crc; | ||
| 847 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 882 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
| 883 | char *kaddr; | ||
| 848 | 884 | ||
| 885 | kaddr = kmap(page); | ||
| 849 | BUG_ON(kaddr == NULL); | 886 | BUG_ON(kaddr == NULL); |
| 850 | con->out_msg->footer.data_crc = | 887 | base = kaddr + con->out_msg_pos.page_pos + bio_offset; |
| 851 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 888 | crc = crc32c(tmpcrc, base, len); |
| 852 | con->out_msg_pos.did_page_crc = 1; | 889 | con->out_msg->footer.data_crc = cpu_to_le32(crc); |
| 890 | con->out_msg_pos.did_page_crc = true; | ||
| 853 | } | 891 | } |
| 854 | ret = kernel_sendpage(con->sock, page, | 892 | ret = ceph_tcp_sendpage(con->sock, page, |
| 855 | con->out_msg_pos.page_pos + page_shift, | 893 | con->out_msg_pos.page_pos + bio_offset, |
| 856 | len, | 894 | len, 1); |
| 857 | MSG_DONTWAIT | MSG_NOSIGNAL | | 895 | |
| 858 | MSG_MORE); | 896 | if (do_datacrc) |
| 859 | |||
| 860 | if (crc && | ||
| 861 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||
| 862 | kunmap(page); | 897 | kunmap(page); |
| 863 | 898 | ||
| 864 | if (ret == -EAGAIN) | ||
| 865 | ret = 0; | ||
| 866 | if (ret <= 0) | 899 | if (ret <= 0) |
| 867 | goto out; | 900 | goto out; |
| 868 | 901 | ||
| @@ -871,7 +904,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 871 | if (ret == len) { | 904 | if (ret == len) { |
| 872 | con->out_msg_pos.page_pos = 0; | 905 | con->out_msg_pos.page_pos = 0; |
| 873 | con->out_msg_pos.page++; | 906 | con->out_msg_pos.page++; |
| 874 | con->out_msg_pos.did_page_crc = 0; | 907 | con->out_msg_pos.did_page_crc = false; |
| 875 | if (in_trail) | 908 | if (in_trail) |
| 876 | list_move_tail(&page->lru, | 909 | list_move_tail(&page->lru, |
| 877 | &msg->trail->head); | 910 | &msg->trail->head); |
| @@ -888,12 +921,10 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 888 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); | 921 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); |
| 889 | 922 | ||
| 890 | /* prepare and queue up footer, too */ | 923 | /* prepare and queue up footer, too */ |
| 891 | if (!crc) | 924 | if (!do_datacrc) |
| 892 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; | 925 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; |
| 893 | con->out_kvec_bytes = 0; | 926 | ceph_con_out_kvec_reset(con); |
| 894 | con->out_kvec_left = 0; | 927 | prepare_write_message_footer(con); |
| 895 | con->out_kvec_cur = con->out_kvec; | ||
| 896 | prepare_write_message_footer(con, 0); | ||
| 897 | ret = 1; | 928 | ret = 1; |
| 898 | out: | 929 | out: |
| 899 | return ret; | 930 | return ret; |
| @@ -907,12 +938,9 @@ static int write_partial_skip(struct ceph_connection *con) | |||
| 907 | int ret; | 938 | int ret; |
| 908 | 939 | ||
| 909 | while (con->out_skip > 0) { | 940 | while (con->out_skip > 0) { |
| 910 | struct kvec iov = { | 941 | size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); |
| 911 | .iov_base = page_address(con->msgr->zero_page), | ||
| 912 | .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) | ||
| 913 | }; | ||
| 914 | 942 | ||
| 915 | ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); | 943 | ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); |
| 916 | if (ret <= 0) | 944 | if (ret <= 0) |
| 917 | goto out; | 945 | goto out; |
| 918 | con->out_skip -= ret; | 946 | con->out_skip -= ret; |
| @@ -1085,8 +1113,8 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) | |||
| 1085 | static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, | 1113 | static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, |
| 1086 | char delim, const char **ipend) | 1114 | char delim, const char **ipend) |
| 1087 | { | 1115 | { |
| 1088 | struct sockaddr_in *in4 = (void *)ss; | 1116 | struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
| 1089 | struct sockaddr_in6 *in6 = (void *)ss; | 1117 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
| 1090 | 1118 | ||
| 1091 | memset(ss, 0, sizeof(*ss)); | 1119 | memset(ss, 0, sizeof(*ss)); |
| 1092 | 1120 | ||
| @@ -1512,10 +1540,9 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1512 | if (ret <= 0) | 1540 | if (ret <= 0) |
| 1513 | return ret; | 1541 | return ret; |
| 1514 | section->iov_len += ret; | 1542 | section->iov_len += ret; |
| 1515 | if (section->iov_len == sec_len) | ||
| 1516 | *crc = crc32c(0, section->iov_base, | ||
| 1517 | section->iov_len); | ||
| 1518 | } | 1543 | } |
| 1544 | if (section->iov_len == sec_len) | ||
| 1545 | *crc = crc32c(0, section->iov_base, section->iov_len); | ||
| 1519 | 1546 | ||
| 1520 | return 1; | 1547 | return 1; |
| 1521 | } | 1548 | } |
| @@ -1527,7 +1554,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
| 1527 | 1554 | ||
| 1528 | static int read_partial_message_pages(struct ceph_connection *con, | 1555 | static int read_partial_message_pages(struct ceph_connection *con, |
| 1529 | struct page **pages, | 1556 | struct page **pages, |
| 1530 | unsigned data_len, int datacrc) | 1557 | unsigned data_len, bool do_datacrc) |
| 1531 | { | 1558 | { |
| 1532 | void *p; | 1559 | void *p; |
| 1533 | int ret; | 1560 | int ret; |
| @@ -1540,7 +1567,7 @@ static int read_partial_message_pages(struct ceph_connection *con, | |||
| 1540 | p = kmap(pages[con->in_msg_pos.page]); | 1567 | p = kmap(pages[con->in_msg_pos.page]); |
| 1541 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1568 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
| 1542 | left); | 1569 | left); |
| 1543 | if (ret > 0 && datacrc) | 1570 | if (ret > 0 && do_datacrc) |
| 1544 | con->in_data_crc = | 1571 | con->in_data_crc = |
| 1545 | crc32c(con->in_data_crc, | 1572 | crc32c(con->in_data_crc, |
| 1546 | p + con->in_msg_pos.page_pos, ret); | 1573 | p + con->in_msg_pos.page_pos, ret); |
| @@ -1560,7 +1587,7 @@ static int read_partial_message_pages(struct ceph_connection *con, | |||
| 1560 | #ifdef CONFIG_BLOCK | 1587 | #ifdef CONFIG_BLOCK |
| 1561 | static int read_partial_message_bio(struct ceph_connection *con, | 1588 | static int read_partial_message_bio(struct ceph_connection *con, |
| 1562 | struct bio **bio_iter, int *bio_seg, | 1589 | struct bio **bio_iter, int *bio_seg, |
| 1563 | unsigned data_len, int datacrc) | 1590 | unsigned data_len, bool do_datacrc) |
| 1564 | { | 1591 | { |
| 1565 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | 1592 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); |
| 1566 | void *p; | 1593 | void *p; |
| @@ -1576,7 +1603,7 @@ static int read_partial_message_bio(struct ceph_connection *con, | |||
| 1576 | 1603 | ||
| 1577 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1604 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
| 1578 | left); | 1605 | left); |
| 1579 | if (ret > 0 && datacrc) | 1606 | if (ret > 0 && do_datacrc) |
| 1580 | con->in_data_crc = | 1607 | con->in_data_crc = |
| 1581 | crc32c(con->in_data_crc, | 1608 | crc32c(con->in_data_crc, |
| 1582 | p + con->in_msg_pos.page_pos, ret); | 1609 | p + con->in_msg_pos.page_pos, ret); |
| @@ -1603,9 +1630,10 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1603 | int ret; | 1630 | int ret; |
| 1604 | int to, left; | 1631 | int to, left; |
| 1605 | unsigned front_len, middle_len, data_len; | 1632 | unsigned front_len, middle_len, data_len; |
| 1606 | int datacrc = con->msgr->nocrc; | 1633 | bool do_datacrc = !con->msgr->nocrc; |
| 1607 | int skip; | 1634 | int skip; |
| 1608 | u64 seq; | 1635 | u64 seq; |
| 1636 | u32 crc; | ||
| 1609 | 1637 | ||
| 1610 | dout("read_partial_message con %p msg %p\n", con, m); | 1638 | dout("read_partial_message con %p msg %p\n", con, m); |
| 1611 | 1639 | ||
| @@ -1618,17 +1646,16 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1618 | if (ret <= 0) | 1646 | if (ret <= 0) |
| 1619 | return ret; | 1647 | return ret; |
| 1620 | con->in_base_pos += ret; | 1648 | con->in_base_pos += ret; |
| 1621 | if (con->in_base_pos == sizeof(con->in_hdr)) { | ||
| 1622 | u32 crc = crc32c(0, (void *)&con->in_hdr, | ||
| 1623 | sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); | ||
| 1624 | if (crc != le32_to_cpu(con->in_hdr.crc)) { | ||
| 1625 | pr_err("read_partial_message bad hdr " | ||
| 1626 | " crc %u != expected %u\n", | ||
| 1627 | crc, con->in_hdr.crc); | ||
| 1628 | return -EBADMSG; | ||
| 1629 | } | ||
| 1630 | } | ||
| 1631 | } | 1649 | } |
| 1650 | |||
| 1651 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); | ||
| 1652 | if (cpu_to_le32(crc) != con->in_hdr.crc) { | ||
| 1653 | pr_err("read_partial_message bad hdr " | ||
| 1654 | " crc %u != expected %u\n", | ||
| 1655 | crc, con->in_hdr.crc); | ||
| 1656 | return -EBADMSG; | ||
| 1657 | } | ||
| 1658 | |||
| 1632 | front_len = le32_to_cpu(con->in_hdr.front_len); | 1659 | front_len = le32_to_cpu(con->in_hdr.front_len); |
| 1633 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) | 1660 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) |
| 1634 | return -EIO; | 1661 | return -EIO; |
| @@ -1714,7 +1741,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1714 | while (con->in_msg_pos.data_pos < data_len) { | 1741 | while (con->in_msg_pos.data_pos < data_len) { |
| 1715 | if (m->pages) { | 1742 | if (m->pages) { |
| 1716 | ret = read_partial_message_pages(con, m->pages, | 1743 | ret = read_partial_message_pages(con, m->pages, |
| 1717 | data_len, datacrc); | 1744 | data_len, do_datacrc); |
| 1718 | if (ret <= 0) | 1745 | if (ret <= 0) |
| 1719 | return ret; | 1746 | return ret; |
| 1720 | #ifdef CONFIG_BLOCK | 1747 | #ifdef CONFIG_BLOCK |
| @@ -1722,7 +1749,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1722 | 1749 | ||
| 1723 | ret = read_partial_message_bio(con, | 1750 | ret = read_partial_message_bio(con, |
| 1724 | &m->bio_iter, &m->bio_seg, | 1751 | &m->bio_iter, &m->bio_seg, |
| 1725 | data_len, datacrc); | 1752 | data_len, do_datacrc); |
| 1726 | if (ret <= 0) | 1753 | if (ret <= 0) |
| 1727 | return ret; | 1754 | return ret; |
| 1728 | #endif | 1755 | #endif |
| @@ -1757,7 +1784,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1757 | m, con->in_middle_crc, m->footer.middle_crc); | 1784 | m, con->in_middle_crc, m->footer.middle_crc); |
| 1758 | return -EBADMSG; | 1785 | return -EBADMSG; |
| 1759 | } | 1786 | } |
| 1760 | if (datacrc && | 1787 | if (do_datacrc && |
| 1761 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && | 1788 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && |
| 1762 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { | 1789 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { |
| 1763 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, | 1790 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, |
| @@ -1819,7 +1846,6 @@ more: | |||
| 1819 | 1846 | ||
| 1820 | /* open the socket first? */ | 1847 | /* open the socket first? */ |
| 1821 | if (con->sock == NULL) { | 1848 | if (con->sock == NULL) { |
| 1822 | prepare_write_banner(msgr, con); | ||
| 1823 | prepare_write_connect(msgr, con, 1); | 1849 | prepare_write_connect(msgr, con, 1); |
| 1824 | prepare_read_banner(con); | 1850 | prepare_read_banner(con); |
| 1825 | set_bit(CONNECTING, &con->state); | 1851 | set_bit(CONNECTING, &con->state); |
| @@ -1829,11 +1855,9 @@ more: | |||
| 1829 | con->in_tag = CEPH_MSGR_TAG_READY; | 1855 | con->in_tag = CEPH_MSGR_TAG_READY; |
| 1830 | dout("try_write initiating connect on %p new state %lu\n", | 1856 | dout("try_write initiating connect on %p new state %lu\n", |
| 1831 | con, con->state); | 1857 | con, con->state); |
| 1832 | con->sock = ceph_tcp_connect(con); | 1858 | ret = ceph_tcp_connect(con); |
| 1833 | if (IS_ERR(con->sock)) { | 1859 | if (ret < 0) { |
| 1834 | con->sock = NULL; | ||
| 1835 | con->error_msg = "connect error"; | 1860 | con->error_msg = "connect error"; |
| 1836 | ret = -1; | ||
| 1837 | goto out; | 1861 | goto out; |
| 1838 | } | 1862 | } |
| 1839 | } | 1863 | } |
| @@ -1953,8 +1977,9 @@ more: | |||
| 1953 | * | 1977 | * |
| 1954 | * FIXME: there must be a better way to do this! | 1978 | * FIXME: there must be a better way to do this! |
| 1955 | */ | 1979 | */ |
| 1956 | static char buf[1024]; | 1980 | static char buf[SKIP_BUF_SIZE]; |
| 1957 | int skip = min(1024, -con->in_base_pos); | 1981 | int skip = min((int) sizeof (buf), -con->in_base_pos); |
| 1982 | |||
| 1958 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); | 1983 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); |
| 1959 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); | 1984 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); |
| 1960 | if (ret <= 0) | 1985 | if (ret <= 0) |
| @@ -2216,15 +2241,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | |||
| 2216 | 2241 | ||
| 2217 | spin_lock_init(&msgr->global_seq_lock); | 2242 | spin_lock_init(&msgr->global_seq_lock); |
| 2218 | 2243 | ||
| 2219 | /* the zero page is needed if a request is "canceled" while the message | ||
| 2220 | * is being written over the socket */ | ||
| 2221 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); | ||
| 2222 | if (!msgr->zero_page) { | ||
| 2223 | kfree(msgr); | ||
| 2224 | return ERR_PTR(-ENOMEM); | ||
| 2225 | } | ||
| 2226 | kmap(msgr->zero_page); | ||
| 2227 | |||
| 2228 | if (myaddr) | 2244 | if (myaddr) |
| 2229 | msgr->inst.addr = *myaddr; | 2245 | msgr->inst.addr = *myaddr; |
| 2230 | 2246 | ||
| @@ -2241,8 +2257,6 @@ EXPORT_SYMBOL(ceph_messenger_create); | |||
| 2241 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2257 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
| 2242 | { | 2258 | { |
| 2243 | dout("destroy %p\n", msgr); | 2259 | dout("destroy %p\n", msgr); |
| 2244 | kunmap(msgr->zero_page); | ||
| 2245 | __free_page(msgr->zero_page); | ||
| 2246 | kfree(msgr); | 2260 | kfree(msgr); |
| 2247 | dout("destroyed messenger %p\n", msgr); | 2261 | dout("destroyed messenger %p\n", msgr); |
| 2248 | } | 2262 | } |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index fd863fe76934..29ad46ec9dcf 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -283,7 +283,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
| 283 | ceph_decode_32_safe(p, end, yes, bad); | 283 | ceph_decode_32_safe(p, end, yes, bad); |
| 284 | #if BITS_PER_LONG == 32 | 284 | #if BITS_PER_LONG == 32 |
| 285 | err = -EINVAL; | 285 | err = -EINVAL; |
| 286 | if (yes > ULONG_MAX / sizeof(struct crush_rule_step)) | 286 | if (yes > (ULONG_MAX - sizeof(*r)) |
| 287 | / sizeof(struct crush_rule_step)) | ||
| 287 | goto bad; | 288 | goto bad; |
| 288 | #endif | 289 | #endif |
| 289 | r = c->rules[i] = kmalloc(sizeof(*r) + | 290 | r = c->rules[i] = kmalloc(sizeof(*r) + |
