diff options
Diffstat (limited to 'net/dccp/proto.c')
-rw-r--r-- | net/dccp/proto.c | 826 |
1 files changed, 826 insertions, 0 deletions
diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..18a0e69c9dc7 --- /dev/null +++ b/net/dccp/proto.c | |||
@@ -0,0 +1,826 @@ | |||
1 | /* | ||
2 | * net/dccp/proto.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/dccp.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/netdevice.h> | ||
20 | #include <linux/in.h> | ||
21 | #include <linux/if_arp.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/random.h> | ||
24 | #include <net/checksum.h> | ||
25 | |||
26 | #include <net/inet_common.h> | ||
27 | #include <net/ip.h> | ||
28 | #include <net/protocol.h> | ||
29 | #include <net/sock.h> | ||
30 | #include <net/xfrm.h> | ||
31 | |||
32 | #include <asm/semaphore.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/timer.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/poll.h> | ||
37 | #include <linux/dccp.h> | ||
38 | |||
39 | #include "ccid.h" | ||
40 | #include "dccp.h" | ||
41 | |||
42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; | ||
43 | |||
44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | ||
45 | |||
46 | static struct net_protocol dccp_protocol = { | ||
47 | .handler = dccp_v4_rcv, | ||
48 | .err_handler = dccp_v4_err, | ||
49 | }; | ||
50 | |||
51 | const char *dccp_packet_name(const int type) | ||
52 | { | ||
53 | static const char *dccp_packet_names[] = { | ||
54 | [DCCP_PKT_REQUEST] = "REQUEST", | ||
55 | [DCCP_PKT_RESPONSE] = "RESPONSE", | ||
56 | [DCCP_PKT_DATA] = "DATA", | ||
57 | [DCCP_PKT_ACK] = "ACK", | ||
58 | [DCCP_PKT_DATAACK] = "DATAACK", | ||
59 | [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", | ||
60 | [DCCP_PKT_CLOSE] = "CLOSE", | ||
61 | [DCCP_PKT_RESET] = "RESET", | ||
62 | [DCCP_PKT_SYNC] = "SYNC", | ||
63 | [DCCP_PKT_SYNCACK] = "SYNCACK", | ||
64 | }; | ||
65 | |||
66 | if (type >= DCCP_NR_PKT_TYPES) | ||
67 | return "INVALID"; | ||
68 | else | ||
69 | return dccp_packet_names[type]; | ||
70 | } | ||
71 | |||
72 | EXPORT_SYMBOL_GPL(dccp_packet_name); | ||
73 | |||
74 | const char *dccp_state_name(const int state) | ||
75 | { | ||
76 | static char *dccp_state_names[] = { | ||
77 | [DCCP_OPEN] = "OPEN", | ||
78 | [DCCP_REQUESTING] = "REQUESTING", | ||
79 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
80 | [DCCP_LISTEN] = "LISTEN", | ||
81 | [DCCP_RESPOND] = "RESPOND", | ||
82 | [DCCP_CLOSING] = "CLOSING", | ||
83 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
84 | [DCCP_CLOSED] = "CLOSED", | ||
85 | }; | ||
86 | |||
87 | if (state >= DCCP_MAX_STATES) | ||
88 | return "INVALID STATE!"; | ||
89 | else | ||
90 | return dccp_state_names[state]; | ||
91 | } | ||
92 | |||
93 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
94 | |||
95 | static inline int dccp_listen_start(struct sock *sk) | ||
96 | { | ||
97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | ||
98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | ||
99 | } | ||
100 | |||
101 | int dccp_disconnect(struct sock *sk, int flags) | ||
102 | { | ||
103 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
104 | struct inet_sock *inet = inet_sk(sk); | ||
105 | int err = 0; | ||
106 | const int old_state = sk->sk_state; | ||
107 | |||
108 | if (old_state != DCCP_CLOSED) | ||
109 | dccp_set_state(sk, DCCP_CLOSED); | ||
110 | |||
111 | /* ABORT function of RFC793 */ | ||
112 | if (old_state == DCCP_LISTEN) { | ||
113 | inet_csk_listen_stop(sk); | ||
114 | /* FIXME: do the active reset thing */ | ||
115 | } else if (old_state == DCCP_REQUESTING) | ||
116 | sk->sk_err = ECONNRESET; | ||
117 | |||
118 | dccp_clear_xmit_timers(sk); | ||
119 | __skb_queue_purge(&sk->sk_receive_queue); | ||
120 | if (sk->sk_send_head != NULL) { | ||
121 | __kfree_skb(sk->sk_send_head); | ||
122 | sk->sk_send_head = NULL; | ||
123 | } | ||
124 | |||
125 | inet->dport = 0; | ||
126 | |||
127 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | ||
128 | inet_reset_saddr(sk); | ||
129 | |||
130 | sk->sk_shutdown = 0; | ||
131 | sock_reset_flag(sk, SOCK_DONE); | ||
132 | |||
133 | icsk->icsk_backoff = 0; | ||
134 | inet_csk_delack_init(sk); | ||
135 | __sk_dst_reset(sk); | ||
136 | |||
137 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); | ||
138 | |||
139 | sk->sk_error_report(sk); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Wait for a DCCP event. | ||
145 | * | ||
146 | * Note that we don't need to lock the socket, as the upper poll layers | ||
147 | * take care of normal races (between the test and the event) and we don't | ||
148 | * go look at any of the socket buffers directly. | ||
149 | */ | ||
150 | static unsigned int dccp_poll(struct file *file, struct socket *sock, | ||
151 | poll_table *wait) | ||
152 | { | ||
153 | unsigned int mask; | ||
154 | struct sock *sk = sock->sk; | ||
155 | |||
156 | poll_wait(file, sk->sk_sleep, wait); | ||
157 | if (sk->sk_state == DCCP_LISTEN) | ||
158 | return inet_csk_listen_poll(sk); | ||
159 | |||
160 | /* Socket is not locked. We are protected from async events | ||
161 | by poll logic and correct handling of state changes | ||
162 | made by another threads is impossible in any case. | ||
163 | */ | ||
164 | |||
165 | mask = 0; | ||
166 | if (sk->sk_err) | ||
167 | mask = POLLERR; | ||
168 | |||
169 | if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) | ||
170 | mask |= POLLHUP; | ||
171 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
172 | mask |= POLLIN | POLLRDNORM; | ||
173 | |||
174 | /* Connected? */ | ||
175 | if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { | ||
176 | if (atomic_read(&sk->sk_rmem_alloc) > 0) | ||
177 | mask |= POLLIN | POLLRDNORM; | ||
178 | |||
179 | if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { | ||
180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
181 | mask |= POLLOUT | POLLWRNORM; | ||
182 | } else { /* send SIGIO later */ | ||
183 | set_bit(SOCK_ASYNC_NOSPACE, | ||
184 | &sk->sk_socket->flags); | ||
185 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
186 | |||
187 | /* Race breaker. If space is freed after | ||
188 | * wspace test but before the flags are set, | ||
189 | * IO signal will be lost. | ||
190 | */ | ||
191 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) | ||
192 | mask |= POLLOUT | POLLWRNORM; | ||
193 | } | ||
194 | } | ||
195 | } | ||
196 | return mask; | ||
197 | } | ||
198 | |||
199 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | ||
200 | { | ||
201 | dccp_pr_debug("entry\n"); | ||
202 | return -ENOIOCTLCMD; | ||
203 | } | ||
204 | |||
205 | int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
206 | char __user *optval, int optlen) | ||
207 | { | ||
208 | struct dccp_sock *dp; | ||
209 | int err; | ||
210 | int val; | ||
211 | |||
212 | if (level != SOL_DCCP) | ||
213 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
214 | |||
215 | if (optlen < sizeof(int)) | ||
216 | return -EINVAL; | ||
217 | |||
218 | if (get_user(val, (int __user *)optval)) | ||
219 | return -EFAULT; | ||
220 | |||
221 | lock_sock(sk); | ||
222 | |||
223 | dp = dccp_sk(sk); | ||
224 | err = 0; | ||
225 | |||
226 | switch (optname) { | ||
227 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
228 | dp->dccps_packet_size = val; | ||
229 | break; | ||
230 | default: | ||
231 | err = -ENOPROTOOPT; | ||
232 | break; | ||
233 | } | ||
234 | |||
235 | release_sock(sk); | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
240 | char __user *optval, int __user *optlen) | ||
241 | { | ||
242 | struct dccp_sock *dp; | ||
243 | int val, len; | ||
244 | |||
245 | if (level != SOL_DCCP) | ||
246 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
247 | |||
248 | if (get_user(len, optlen)) | ||
249 | return -EFAULT; | ||
250 | |||
251 | len = min_t(unsigned int, len, sizeof(int)); | ||
252 | if (len < 0) | ||
253 | return -EINVAL; | ||
254 | |||
255 | dp = dccp_sk(sk); | ||
256 | |||
257 | switch (optname) { | ||
258 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
259 | val = dp->dccps_packet_size; | ||
260 | break; | ||
261 | default: | ||
262 | return -ENOPROTOOPT; | ||
263 | } | ||
264 | |||
265 | if (put_user(len, optlen) || copy_to_user(optval, &val, len)) | ||
266 | return -EFAULT; | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
272 | size_t len) | ||
273 | { | ||
274 | const struct dccp_sock *dp = dccp_sk(sk); | ||
275 | const int flags = msg->msg_flags; | ||
276 | const int noblock = flags & MSG_DONTWAIT; | ||
277 | struct sk_buff *skb; | ||
278 | int rc, size; | ||
279 | long timeo; | ||
280 | |||
281 | if (len > dp->dccps_mss_cache) | ||
282 | return -EMSGSIZE; | ||
283 | |||
284 | lock_sock(sk); | ||
285 | timeo = sock_sndtimeo(sk, noblock); | ||
286 | |||
287 | /* | ||
288 | * We have to use sk_stream_wait_connect here to set sk_write_pending, | ||
289 | * so that the trick in dccp_rcv_request_sent_state_process. | ||
290 | */ | ||
291 | /* Wait for a connection to finish. */ | ||
292 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) | ||
293 | if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) | ||
294 | goto out_release; | ||
295 | |||
296 | size = sk->sk_prot->max_header + len; | ||
297 | release_sock(sk); | ||
298 | skb = sock_alloc_send_skb(sk, size, noblock, &rc); | ||
299 | lock_sock(sk); | ||
300 | if (skb == NULL) | ||
301 | goto out_release; | ||
302 | |||
303 | skb_reserve(skb, sk->sk_prot->max_header); | ||
304 | rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
305 | if (rc != 0) | ||
306 | goto out_discard; | ||
307 | |||
308 | rc = dccp_write_xmit(sk, skb, &timeo); | ||
309 | /* | ||
310 | * XXX we don't use sk_write_queue, so just discard the packet. | ||
311 | * Current plan however is to _use_ sk_write_queue with | ||
312 | * an algorith similar to tcp_sendmsg, where the main difference | ||
313 | * is that in DCCP we have to respect packet boundaries, so | ||
314 | * no coalescing of skbs. | ||
315 | * | ||
316 | * This bug was _quickly_ found & fixed by just looking at an OSTRA | ||
317 | * generated callgraph 8) -acme | ||
318 | */ | ||
319 | if (rc != 0) | ||
320 | goto out_discard; | ||
321 | out_release: | ||
322 | release_sock(sk); | ||
323 | return rc ? : len; | ||
324 | out_discard: | ||
325 | kfree_skb(skb); | ||
326 | goto out_release; | ||
327 | } | ||
328 | |||
329 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
330 | size_t len, int nonblock, int flags, int *addr_len) | ||
331 | { | ||
332 | const struct dccp_hdr *dh; | ||
333 | long timeo; | ||
334 | |||
335 | lock_sock(sk); | ||
336 | |||
337 | if (sk->sk_state == DCCP_LISTEN) { | ||
338 | len = -ENOTCONN; | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | timeo = sock_rcvtimeo(sk, nonblock); | ||
343 | |||
344 | do { | ||
345 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | ||
346 | |||
347 | if (skb == NULL) | ||
348 | goto verify_sock_status; | ||
349 | |||
350 | dh = dccp_hdr(skb); | ||
351 | |||
352 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
353 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
354 | goto found_ok_skb; | ||
355 | |||
356 | if (dh->dccph_type == DCCP_PKT_RESET || | ||
357 | dh->dccph_type == DCCP_PKT_CLOSE) { | ||
358 | dccp_pr_debug("found fin ok!\n"); | ||
359 | len = 0; | ||
360 | goto found_fin_ok; | ||
361 | } | ||
362 | dccp_pr_debug("packet_type=%s\n", | ||
363 | dccp_packet_name(dh->dccph_type)); | ||
364 | sk_eat_skb(sk, skb); | ||
365 | verify_sock_status: | ||
366 | if (sock_flag(sk, SOCK_DONE)) { | ||
367 | len = 0; | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | if (sk->sk_err) { | ||
372 | len = sock_error(sk); | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | ||
377 | len = 0; | ||
378 | break; | ||
379 | } | ||
380 | |||
381 | if (sk->sk_state == DCCP_CLOSED) { | ||
382 | if (!sock_flag(sk, SOCK_DONE)) { | ||
383 | /* This occurs when user tries to read | ||
384 | * from never connected socket. | ||
385 | */ | ||
386 | len = -ENOTCONN; | ||
387 | break; | ||
388 | } | ||
389 | len = 0; | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | if (!timeo) { | ||
394 | len = -EAGAIN; | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | if (signal_pending(current)) { | ||
399 | len = sock_intr_errno(timeo); | ||
400 | break; | ||
401 | } | ||
402 | |||
403 | sk_wait_data(sk, &timeo); | ||
404 | continue; | ||
405 | found_ok_skb: | ||
406 | if (len > skb->len) | ||
407 | len = skb->len; | ||
408 | else if (len < skb->len) | ||
409 | msg->msg_flags |= MSG_TRUNC; | ||
410 | |||
411 | if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { | ||
412 | /* Exception. Bailout! */ | ||
413 | len = -EFAULT; | ||
414 | break; | ||
415 | } | ||
416 | found_fin_ok: | ||
417 | if (!(flags & MSG_PEEK)) | ||
418 | sk_eat_skb(sk, skb); | ||
419 | break; | ||
420 | } while (1); | ||
421 | out: | ||
422 | release_sock(sk); | ||
423 | return len; | ||
424 | } | ||
425 | |||
426 | static int inet_dccp_listen(struct socket *sock, int backlog) | ||
427 | { | ||
428 | struct sock *sk = sock->sk; | ||
429 | unsigned char old_state; | ||
430 | int err; | ||
431 | |||
432 | lock_sock(sk); | ||
433 | |||
434 | err = -EINVAL; | ||
435 | if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) | ||
436 | goto out; | ||
437 | |||
438 | old_state = sk->sk_state; | ||
439 | if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) | ||
440 | goto out; | ||
441 | |||
442 | /* Really, if the socket is already in listen state | ||
443 | * we can only allow the backlog to be adjusted. | ||
444 | */ | ||
445 | if (old_state != DCCP_LISTEN) { | ||
446 | /* | ||
447 | * FIXME: here it probably should be sk->sk_prot->listen_start | ||
448 | * see tcp_listen_start | ||
449 | */ | ||
450 | err = dccp_listen_start(sk); | ||
451 | if (err) | ||
452 | goto out; | ||
453 | } | ||
454 | sk->sk_max_ack_backlog = backlog; | ||
455 | err = 0; | ||
456 | |||
457 | out: | ||
458 | release_sock(sk); | ||
459 | return err; | ||
460 | } | ||
461 | |||
462 | static const unsigned char dccp_new_state[] = { | ||
463 | /* current state: new state: action: */ | ||
464 | [0] = DCCP_CLOSED, | ||
465 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
466 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
467 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
468 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
469 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
470 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
471 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
472 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
473 | }; | ||
474 | |||
475 | static int dccp_close_state(struct sock *sk) | ||
476 | { | ||
477 | const int next = dccp_new_state[sk->sk_state]; | ||
478 | const int ns = next & DCCP_STATE_MASK; | ||
479 | |||
480 | if (ns != sk->sk_state) | ||
481 | dccp_set_state(sk, ns); | ||
482 | |||
483 | return next & DCCP_ACTION_FIN; | ||
484 | } | ||
485 | |||
486 | void dccp_close(struct sock *sk, long timeout) | ||
487 | { | ||
488 | struct sk_buff *skb; | ||
489 | |||
490 | lock_sock(sk); | ||
491 | |||
492 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
493 | |||
494 | if (sk->sk_state == DCCP_LISTEN) { | ||
495 | dccp_set_state(sk, DCCP_CLOSED); | ||
496 | |||
497 | /* Special case. */ | ||
498 | inet_csk_listen_stop(sk); | ||
499 | |||
500 | goto adjudge_to_death; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * We need to flush the recv. buffs. We do this only on the | ||
505 | * descriptor close, not protocol-sourced closes, because the | ||
506 | *reader process may not have drained the data yet! | ||
507 | */ | ||
508 | /* FIXME: check for unread data */ | ||
509 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
510 | __kfree_skb(skb); | ||
511 | } | ||
512 | |||
513 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
514 | /* Check zero linger _after_ checking for unread data. */ | ||
515 | sk->sk_prot->disconnect(sk, 0); | ||
516 | } else if (dccp_close_state(sk)) { | ||
517 | dccp_send_close(sk, 1); | ||
518 | } | ||
519 | |||
520 | sk_stream_wait_close(sk, timeout); | ||
521 | |||
522 | adjudge_to_death: | ||
523 | /* | ||
524 | * It is the last release_sock in its life. It will remove backlog. | ||
525 | */ | ||
526 | release_sock(sk); | ||
527 | /* | ||
528 | * Now socket is owned by kernel and we acquire BH lock | ||
529 | * to finish close. No need to check for user refs. | ||
530 | */ | ||
531 | local_bh_disable(); | ||
532 | bh_lock_sock(sk); | ||
533 | BUG_TRAP(!sock_owned_by_user(sk)); | ||
534 | |||
535 | sock_hold(sk); | ||
536 | sock_orphan(sk); | ||
537 | |||
538 | /* | ||
539 | * The last release_sock may have processed the CLOSE or RESET | ||
540 | * packet moving sock to CLOSED state, if not we have to fire | ||
541 | * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" | ||
542 | * in draft-ietf-dccp-spec-11. -acme | ||
543 | */ | ||
544 | if (sk->sk_state == DCCP_CLOSING) { | ||
545 | /* FIXME: should start at 2 * RTT */ | ||
546 | /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ | ||
547 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
548 | inet_csk(sk)->icsk_rto, | ||
549 | DCCP_RTO_MAX); | ||
550 | #if 0 | ||
551 | /* Yeah, we should use sk->sk_prot->orphan_count, etc */ | ||
552 | dccp_set_state(sk, DCCP_CLOSED); | ||
553 | #endif | ||
554 | } | ||
555 | |||
556 | atomic_inc(sk->sk_prot->orphan_count); | ||
557 | if (sk->sk_state == DCCP_CLOSED) | ||
558 | inet_csk_destroy_sock(sk); | ||
559 | |||
560 | /* Otherwise, socket is reprieved until protocol close. */ | ||
561 | |||
562 | bh_unlock_sock(sk); | ||
563 | local_bh_enable(); | ||
564 | sock_put(sk); | ||
565 | } | ||
566 | |||
567 | void dccp_shutdown(struct sock *sk, int how) | ||
568 | { | ||
569 | dccp_pr_debug("entry\n"); | ||
570 | } | ||
571 | |||
572 | static struct proto_ops inet_dccp_ops = { | ||
573 | .family = PF_INET, | ||
574 | .owner = THIS_MODULE, | ||
575 | .release = inet_release, | ||
576 | .bind = inet_bind, | ||
577 | .connect = inet_stream_connect, | ||
578 | .socketpair = sock_no_socketpair, | ||
579 | .accept = inet_accept, | ||
580 | .getname = inet_getname, | ||
581 | /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ | ||
582 | .poll = dccp_poll, | ||
583 | .ioctl = inet_ioctl, | ||
584 | /* FIXME: work on inet_listen to rename it to sock_common_listen */ | ||
585 | .listen = inet_dccp_listen, | ||
586 | .shutdown = inet_shutdown, | ||
587 | .setsockopt = sock_common_setsockopt, | ||
588 | .getsockopt = sock_common_getsockopt, | ||
589 | .sendmsg = inet_sendmsg, | ||
590 | .recvmsg = sock_common_recvmsg, | ||
591 | .mmap = sock_no_mmap, | ||
592 | .sendpage = sock_no_sendpage, | ||
593 | }; | ||
594 | |||
595 | extern struct net_proto_family inet_family_ops; | ||
596 | |||
597 | static struct inet_protosw dccp_v4_protosw = { | ||
598 | .type = SOCK_DCCP, | ||
599 | .protocol = IPPROTO_DCCP, | ||
600 | .prot = &dccp_v4_prot, | ||
601 | .ops = &inet_dccp_ops, | ||
602 | .capability = -1, | ||
603 | .no_check = 0, | ||
604 | .flags = 0, | ||
605 | }; | ||
606 | |||
607 | /* | ||
608 | * This is the global socket data structure used for responding to | ||
609 | * the Out-of-the-blue (OOTB) packets. A control sock will be created | ||
610 | * for this socket at the initialization time. | ||
611 | */ | ||
612 | struct socket *dccp_ctl_socket; | ||
613 | |||
614 | static char dccp_ctl_socket_err_msg[] __initdata = | ||
615 | KERN_ERR "DCCP: Failed to create the control socket.\n"; | ||
616 | |||
617 | static int __init dccp_ctl_sock_init(void) | ||
618 | { | ||
619 | int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, | ||
620 | &dccp_ctl_socket); | ||
621 | if (rc < 0) | ||
622 | printk(dccp_ctl_socket_err_msg); | ||
623 | else { | ||
624 | dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; | ||
625 | inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; | ||
626 | |||
627 | /* Unhash it so that IP input processing does not even | ||
628 | * see it, we do not wish this socket to see incoming | ||
629 | * packets. | ||
630 | */ | ||
631 | dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); | ||
632 | } | ||
633 | |||
634 | return rc; | ||
635 | } | ||
636 | |||
637 | #ifdef CONFIG_IP_DCCP_UNLOAD_HACK | ||
638 | void dccp_ctl_sock_exit(void) | ||
639 | { | ||
640 | if (dccp_ctl_socket != NULL) { | ||
641 | sock_release(dccp_ctl_socket); | ||
642 | dccp_ctl_socket = NULL; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); | ||
647 | #endif | ||
648 | |||
649 | static int __init init_dccp_v4_mibs(void) | ||
650 | { | ||
651 | int rc = -ENOMEM; | ||
652 | |||
653 | dccp_statistics[0] = alloc_percpu(struct dccp_mib); | ||
654 | if (dccp_statistics[0] == NULL) | ||
655 | goto out; | ||
656 | |||
657 | dccp_statistics[1] = alloc_percpu(struct dccp_mib); | ||
658 | if (dccp_statistics[1] == NULL) | ||
659 | goto out_free_one; | ||
660 | |||
661 | rc = 0; | ||
662 | out: | ||
663 | return rc; | ||
664 | out_free_one: | ||
665 | free_percpu(dccp_statistics[0]); | ||
666 | dccp_statistics[0] = NULL; | ||
667 | goto out; | ||
668 | |||
669 | } | ||
670 | |||
671 | static int thash_entries; | ||
672 | module_param(thash_entries, int, 0444); | ||
673 | MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | ||
674 | |||
675 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
676 | int dccp_debug; | ||
677 | module_param(dccp_debug, int, 0444); | ||
678 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | ||
679 | #endif | ||
680 | |||
681 | static int __init dccp_init(void) | ||
682 | { | ||
683 | unsigned long goal; | ||
684 | int ehash_order, bhash_order, i; | ||
685 | int rc = proto_register(&dccp_v4_prot, 1); | ||
686 | |||
687 | if (rc) | ||
688 | goto out; | ||
689 | |||
690 | dccp_hashinfo.bind_bucket_cachep = | ||
691 | kmem_cache_create("dccp_bind_bucket", | ||
692 | sizeof(struct inet_bind_bucket), 0, | ||
693 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
694 | if (!dccp_hashinfo.bind_bucket_cachep) | ||
695 | goto out_proto_unregister; | ||
696 | |||
697 | /* | ||
698 | * Size and allocate the main established and bind bucket | ||
699 | * hash tables. | ||
700 | * | ||
701 | * The methodology is similar to that of the buffer cache. | ||
702 | */ | ||
703 | if (num_physpages >= (128 * 1024)) | ||
704 | goal = num_physpages >> (21 - PAGE_SHIFT); | ||
705 | else | ||
706 | goal = num_physpages >> (23 - PAGE_SHIFT); | ||
707 | |||
708 | if (thash_entries) | ||
709 | goal = (thash_entries * | ||
710 | sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; | ||
711 | for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) | ||
712 | ; | ||
713 | do { | ||
714 | dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / | ||
715 | sizeof(struct inet_ehash_bucket); | ||
716 | dccp_hashinfo.ehash_size >>= 1; | ||
717 | while (dccp_hashinfo.ehash_size & | ||
718 | (dccp_hashinfo.ehash_size - 1)) | ||
719 | dccp_hashinfo.ehash_size--; | ||
720 | dccp_hashinfo.ehash = (struct inet_ehash_bucket *) | ||
721 | __get_free_pages(GFP_ATOMIC, ehash_order); | ||
722 | } while (!dccp_hashinfo.ehash && --ehash_order > 0); | ||
723 | |||
724 | if (!dccp_hashinfo.ehash) { | ||
725 | printk(KERN_CRIT "Failed to allocate DCCP " | ||
726 | "established hash table\n"); | ||
727 | goto out_free_bind_bucket_cachep; | ||
728 | } | ||
729 | |||
730 | for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { | ||
731 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
732 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | ||
733 | } | ||
734 | |||
735 | bhash_order = ehash_order; | ||
736 | |||
737 | do { | ||
738 | dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / | ||
739 | sizeof(struct inet_bind_hashbucket); | ||
740 | if ((dccp_hashinfo.bhash_size > (64 * 1024)) && | ||
741 | bhash_order > 0) | ||
742 | continue; | ||
743 | dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) | ||
744 | __get_free_pages(GFP_ATOMIC, bhash_order); | ||
745 | } while (!dccp_hashinfo.bhash && --bhash_order >= 0); | ||
746 | |||
747 | if (!dccp_hashinfo.bhash) { | ||
748 | printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); | ||
749 | goto out_free_dccp_ehash; | ||
750 | } | ||
751 | |||
752 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | ||
753 | spin_lock_init(&dccp_hashinfo.bhash[i].lock); | ||
754 | INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); | ||
755 | } | ||
756 | |||
757 | if (init_dccp_v4_mibs()) | ||
758 | goto out_free_dccp_bhash; | ||
759 | |||
760 | rc = -EAGAIN; | ||
761 | if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) | ||
762 | goto out_free_dccp_v4_mibs; | ||
763 | |||
764 | inet_register_protosw(&dccp_v4_protosw); | ||
765 | |||
766 | rc = dccp_ctl_sock_init(); | ||
767 | if (rc) | ||
768 | goto out_unregister_protosw; | ||
769 | out: | ||
770 | return rc; | ||
771 | out_unregister_protosw: | ||
772 | inet_unregister_protosw(&dccp_v4_protosw); | ||
773 | inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); | ||
774 | out_free_dccp_v4_mibs: | ||
775 | free_percpu(dccp_statistics[0]); | ||
776 | free_percpu(dccp_statistics[1]); | ||
777 | dccp_statistics[0] = dccp_statistics[1] = NULL; | ||
778 | out_free_dccp_bhash: | ||
779 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | ||
780 | dccp_hashinfo.bhash = NULL; | ||
781 | out_free_dccp_ehash: | ||
782 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | ||
783 | dccp_hashinfo.ehash = NULL; | ||
784 | out_free_bind_bucket_cachep: | ||
785 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
786 | dccp_hashinfo.bind_bucket_cachep = NULL; | ||
787 | out_proto_unregister: | ||
788 | proto_unregister(&dccp_v4_prot); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | static const char dccp_del_proto_err_msg[] __exitdata = | ||
793 | KERN_ERR "can't remove dccp net_protocol\n"; | ||
794 | |||
795 | static void __exit dccp_fini(void) | ||
796 | { | ||
797 | inet_unregister_protosw(&dccp_v4_protosw); | ||
798 | |||
799 | if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) | ||
800 | printk(dccp_del_proto_err_msg); | ||
801 | |||
802 | free_percpu(dccp_statistics[0]); | ||
803 | free_percpu(dccp_statistics[1]); | ||
804 | free_pages((unsigned long)dccp_hashinfo.bhash, | ||
805 | get_order(dccp_hashinfo.bhash_size * | ||
806 | sizeof(struct inet_bind_hashbucket))); | ||
807 | free_pages((unsigned long)dccp_hashinfo.ehash, | ||
808 | get_order(dccp_hashinfo.ehash_size * | ||
809 | sizeof(struct inet_ehash_bucket))); | ||
810 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
811 | proto_unregister(&dccp_v4_prot); | ||
812 | } | ||
813 | |||
814 | module_init(dccp_init); | ||
815 | module_exit(dccp_fini); | ||
816 | |||
817 | /* | ||
818 | * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) | ||
819 | * values directly, Also cover the case where the protocol is not specified, | ||
820 | * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP | ||
821 | */ | ||
822 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); | ||
823 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); | ||
824 | MODULE_LICENSE("GPL"); | ||
825 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); | ||
826 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); | ||