4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
93 EXPORT_SYMBOL_GPL(dccp_set_state);
95 static void dccp_finish_passive_close(struct sock *sk)
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
103 case DCCP_PASSIVE_CLOSEREQ:
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
113 void dccp_done(struct sock *sk)
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
118 sk->sk_shutdown = SHUTDOWN_MASK;
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
123 inet_csk_destroy_sock(sk);
126 EXPORT_SYMBOL_GPL(dccp_done);
128 const char *dccp_packet_name(const int type)
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
143 if (type >= DCCP_NR_PKT_TYPES)
146 return dccp_packet_names[type];
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
151 const char *dccp_state_name(const int state)
153 static char *dccp_state_names[] = {
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
170 return dccp_state_names[state];
173 EXPORT_SYMBOL_GPL(dccp_state_name);
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
177 struct dccp_sock *dp = dccp_sk(sk);
178 struct dccp_minisock *dmsk = dccp_msk(sk);
179 struct inet_connection_sock *icsk = inet_csk(sk);
181 dccp_minisock_init(&dp->dccps_minisock);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(sk);
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 void dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_list_purge(&dp->dccps_featneg);
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
278 struct dccp_sock *dp = dccp_sk(sk);
280 dp->dccps_role = DCCP_ROLE_LISTEN;
281 return inet_csk_listen_start(sk, backlog);
284 static inline int dccp_need_reset(int state)
286 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
287 state != DCCP_REQUESTING;
290 int dccp_disconnect(struct sock *sk, int flags)
292 struct inet_connection_sock *icsk = inet_csk(sk);
293 struct inet_sock *inet = inet_sk(sk);
295 const int old_state = sk->sk_state;
297 if (old_state != DCCP_CLOSED)
298 dccp_set_state(sk, DCCP_CLOSED);
301 * This corresponds to the ABORT function of RFC793, sec. 3.8
302 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
304 if (old_state == DCCP_LISTEN) {
305 inet_csk_listen_stop(sk);
306 } else if (dccp_need_reset(old_state)) {
307 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
308 sk->sk_err = ECONNRESET;
309 } else if (old_state == DCCP_REQUESTING)
310 sk->sk_err = ECONNRESET;
312 dccp_clear_xmit_timers(sk);
314 __skb_queue_purge(&sk->sk_receive_queue);
315 __skb_queue_purge(&sk->sk_write_queue);
316 if (sk->sk_send_head != NULL) {
317 __kfree_skb(sk->sk_send_head);
318 sk->sk_send_head = NULL;
323 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324 inet_reset_saddr(sk);
327 sock_reset_flag(sk, SOCK_DONE);
329 icsk->icsk_backoff = 0;
330 inet_csk_delack_init(sk);
333 WARN_ON(inet->num && !icsk->icsk_bind_hash);
335 sk->sk_error_report(sk);
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
342 * Wait for a DCCP event.
344 * Note that we don't need to lock the socket, as the upper poll layers
345 * take care of normal races (between the test and the event) and we don't
346 * go look at any of the socket buffers directly.
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
352 struct sock *sk = sock->sk;
354 poll_wait(file, sk->sk_sleep, wait);
355 if (sk->sk_state == DCCP_LISTEN)
356 return inet_csk_listen_poll(sk);
358 /* Socket is not locked. We are protected from async events
359 by poll logic and correct handling of state changes
360 made by another threads is impossible in any case.
367 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
369 if (sk->sk_shutdown & RCV_SHUTDOWN)
370 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
373 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375 mask |= POLLIN | POLLRDNORM;
377 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379 mask |= POLLOUT | POLLWRNORM;
380 } else { /* send SIGIO later */
381 set_bit(SOCK_ASYNC_NOSPACE,
382 &sk->sk_socket->flags);
383 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
385 /* Race breaker. If space is freed after
386 * wspace test but before the flags are set,
387 * IO signal will be lost.
389 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390 mask |= POLLOUT | POLLWRNORM;
397 EXPORT_SYMBOL_GPL(dccp_poll);
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
405 if (sk->sk_state == DCCP_LISTEN)
411 unsigned long amount = 0;
413 skb = skb_peek(&sk->sk_receive_queue);
416 * We will only return the amount of this packet since
417 * that is all that will be read.
421 rc = put_user(amount, (int __user *)arg);
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436 char __user *optval, int optlen)
438 struct dccp_sock *dp = dccp_sk(sk);
439 struct dccp_service_list *sl = NULL;
441 if (service == DCCP_SERVICE_INVALID_VALUE ||
442 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
445 if (optlen > sizeof(service)) {
446 sl = kmalloc(optlen, GFP_KERNEL);
450 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451 if (copy_from_user(sl->dccpsl_list,
452 optval + sizeof(service),
453 optlen - sizeof(service)) ||
454 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
461 dp->dccps_service = service;
463 kfree(dp->dccps_service_list);
465 dp->dccps_service_list = sl;
470 /* byte 1 is feature. the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472 struct dccp_so_feat __user *optval)
474 struct dccp_so_feat opt;
478 if (copy_from_user(&opt, optval, sizeof(opt)))
481 * rfc4340: 6.1. Change Options
483 if (opt.dccpsf_len < 1)
486 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
490 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
495 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
496 val, opt.dccpsf_len, GFP_KERNEL);
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509 char __user *optval, int optlen)
511 struct dccp_sock *dp = dccp_sk(sk);
514 if (optlen < sizeof(int))
517 if (get_user(val, (int __user *)optval))
520 if (optname == DCCP_SOCKOPT_SERVICE)
521 return dccp_setsockopt_service(sk, val, optval, optlen);
525 case DCCP_SOCKOPT_PACKET_SIZE:
526 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
529 case DCCP_SOCKOPT_CHANGE_L:
530 if (optlen != sizeof(struct dccp_so_feat))
533 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
534 (struct dccp_so_feat __user *)
537 case DCCP_SOCKOPT_CHANGE_R:
538 if (optlen != sizeof(struct dccp_so_feat))
541 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
542 (struct dccp_so_feat __user *)
545 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
546 if (dp->dccps_role != DCCP_ROLE_SERVER)
549 dp->dccps_server_timewait = (val != 0);
551 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
552 if (val < 0 || val > 15)
555 dp->dccps_pcslen = val;
557 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
558 if (val < 0 || val > 15)
561 dp->dccps_pcrlen = val;
562 /* FIXME: add feature negotiation,
563 * ChangeL(MinimumChecksumCoverage, val) */
575 int dccp_setsockopt(struct sock *sk, int level, int optname,
576 char __user *optval, int optlen)
578 if (level != SOL_DCCP)
579 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
582 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
588 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
589 char __user *optval, int optlen)
591 if (level != SOL_DCCP)
592 return inet_csk_compat_setsockopt(sk, level, optname,
594 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
597 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
600 static int dccp_getsockopt_service(struct sock *sk, int len,
601 __be32 __user *optval,
604 const struct dccp_sock *dp = dccp_sk(sk);
605 const struct dccp_service_list *sl;
606 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
609 if ((sl = dp->dccps_service_list) != NULL) {
610 slen = sl->dccpsl_nr * sizeof(u32);
619 if (put_user(total_len, optlen) ||
620 put_user(dp->dccps_service, optval) ||
621 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
628 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
629 char __user *optval, int __user *optlen)
631 struct dccp_sock *dp;
634 if (get_user(len, optlen))
637 if (len < (int)sizeof(int))
643 case DCCP_SOCKOPT_PACKET_SIZE:
644 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
646 case DCCP_SOCKOPT_SERVICE:
647 return dccp_getsockopt_service(sk, len,
648 (__be32 __user *)optval, optlen);
649 case DCCP_SOCKOPT_GET_CUR_MPS:
650 val = dp->dccps_mss_cache;
652 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
653 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
654 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
655 val = dp->dccps_server_timewait;
657 case DCCP_SOCKOPT_SEND_CSCOV:
658 val = dp->dccps_pcslen;
660 case DCCP_SOCKOPT_RECV_CSCOV:
661 val = dp->dccps_pcrlen;
664 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
665 len, (u32 __user *)optval, optlen);
667 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
668 len, (u32 __user *)optval, optlen);
674 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
680 int dccp_getsockopt(struct sock *sk, int level, int optname,
681 char __user *optval, int __user *optlen)
683 if (level != SOL_DCCP)
684 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 EXPORT_SYMBOL_GPL(dccp_getsockopt);
693 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
694 char __user *optval, int __user *optlen)
696 if (level != SOL_DCCP)
697 return inet_csk_compat_getsockopt(sk, level, optname,
699 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
702 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
705 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
708 const struct dccp_sock *dp = dccp_sk(sk);
709 const int flags = msg->msg_flags;
710 const int noblock = flags & MSG_DONTWAIT;
715 if (len > dp->dccps_mss_cache)
720 if (sysctl_dccp_tx_qlen &&
721 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
726 timeo = sock_sndtimeo(sk, noblock);
729 * We have to use sk_stream_wait_connect here to set sk_write_pending,
730 * so that the trick in dccp_rcv_request_sent_state_process.
732 /* Wait for a connection to finish. */
733 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
734 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
737 size = sk->sk_prot->max_header + len;
739 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
744 skb_reserve(skb, sk->sk_prot->max_header);
745 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
749 skb_queue_tail(&sk->sk_write_queue, skb);
750 dccp_write_xmit(sk,0);
759 EXPORT_SYMBOL_GPL(dccp_sendmsg);
761 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
762 size_t len, int nonblock, int flags, int *addr_len)
764 const struct dccp_hdr *dh;
769 if (sk->sk_state == DCCP_LISTEN) {
774 timeo = sock_rcvtimeo(sk, nonblock);
777 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
780 goto verify_sock_status;
784 switch (dh->dccph_type) {
786 case DCCP_PKT_DATAACK:
790 case DCCP_PKT_CLOSEREQ:
791 if (!(flags & MSG_PEEK))
792 dccp_finish_passive_close(sk);
795 dccp_pr_debug("found fin (%s) ok!\n",
796 dccp_packet_name(dh->dccph_type));
800 dccp_pr_debug("packet_type=%s\n",
801 dccp_packet_name(dh->dccph_type));
802 sk_eat_skb(sk, skb, 0);
805 if (sock_flag(sk, SOCK_DONE)) {
811 len = sock_error(sk);
815 if (sk->sk_shutdown & RCV_SHUTDOWN) {
820 if (sk->sk_state == DCCP_CLOSED) {
821 if (!sock_flag(sk, SOCK_DONE)) {
822 /* This occurs when user tries to read
823 * from never connected socket.
837 if (signal_pending(current)) {
838 len = sock_intr_errno(timeo);
842 sk_wait_data(sk, &timeo);
847 else if (len < skb->len)
848 msg->msg_flags |= MSG_TRUNC;
850 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
851 /* Exception. Bailout! */
856 if (!(flags & MSG_PEEK))
857 sk_eat_skb(sk, skb, 0);
865 EXPORT_SYMBOL_GPL(dccp_recvmsg);
867 int inet_dccp_listen(struct socket *sock, int backlog)
869 struct sock *sk = sock->sk;
870 unsigned char old_state;
876 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
879 old_state = sk->sk_state;
880 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
883 /* Really, if the socket is already in listen state
884 * we can only allow the backlog to be adjusted.
886 if (old_state != DCCP_LISTEN) {
888 * FIXME: here it probably should be sk->sk_prot->listen_start
889 * see tcp_listen_start
891 err = dccp_listen_start(sk, backlog);
895 sk->sk_max_ack_backlog = backlog;
903 EXPORT_SYMBOL_GPL(inet_dccp_listen);
905 static void dccp_terminate_connection(struct sock *sk)
907 u8 next_state = DCCP_CLOSED;
909 switch (sk->sk_state) {
910 case DCCP_PASSIVE_CLOSE:
911 case DCCP_PASSIVE_CLOSEREQ:
912 dccp_finish_passive_close(sk);
915 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
916 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
919 dccp_send_close(sk, 1);
921 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
922 !dccp_sk(sk)->dccps_server_timewait)
923 next_state = DCCP_ACTIVE_CLOSEREQ;
925 next_state = DCCP_CLOSING;
928 dccp_set_state(sk, next_state);
932 void dccp_close(struct sock *sk, long timeout)
934 struct dccp_sock *dp = dccp_sk(sk);
936 u32 data_was_unread = 0;
941 sk->sk_shutdown = SHUTDOWN_MASK;
943 if (sk->sk_state == DCCP_LISTEN) {
944 dccp_set_state(sk, DCCP_CLOSED);
947 inet_csk_listen_stop(sk);
949 goto adjudge_to_death;
952 sk_stop_timer(sk, &dp->dccps_xmit_timer);
955 * We need to flush the recv. buffs. We do this only on the
956 * descriptor close, not protocol-sourced closes, because the
957 *reader process may not have drained the data yet!
959 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
960 data_was_unread += skb->len;
964 if (data_was_unread) {
965 /* Unread data was tossed, send an appropriate Reset Code */
966 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
967 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
968 dccp_set_state(sk, DCCP_CLOSED);
969 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
970 /* Check zero linger _after_ checking for unread data. */
971 sk->sk_prot->disconnect(sk, 0);
972 } else if (sk->sk_state != DCCP_CLOSED) {
973 dccp_terminate_connection(sk);
976 sk_stream_wait_close(sk, timeout);
979 state = sk->sk_state;
982 atomic_inc(sk->sk_prot->orphan_count);
985 * It is the last release_sock in its life. It will remove backlog.
989 * Now socket is owned by kernel and we acquire BH lock
990 * to finish close. No need to check for user refs.
994 WARN_ON(sock_owned_by_user(sk));
996 /* Have we already been destroyed by a softirq or backlog? */
997 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1000 if (sk->sk_state == DCCP_CLOSED)
1001 inet_csk_destroy_sock(sk);
1003 /* Otherwise, socket is reprieved until protocol close. */
1011 EXPORT_SYMBOL_GPL(dccp_close);
1013 void dccp_shutdown(struct sock *sk, int how)
1015 dccp_pr_debug("called shutdown(%x)\n", how);
1018 EXPORT_SYMBOL_GPL(dccp_shutdown);
1020 static inline int dccp_mib_init(void)
1022 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1025 static inline void dccp_mib_exit(void)
1027 snmp_mib_free((void**)dccp_statistics);
1030 static int thash_entries;
1031 module_param(thash_entries, int, 0444);
1032 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1034 #ifdef CONFIG_IP_DCCP_DEBUG
1036 module_param(dccp_debug, bool, 0644);
1037 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1039 EXPORT_SYMBOL_GPL(dccp_debug);
1042 static int __init dccp_init(void)
1045 int ehash_order, bhash_order, i;
1048 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1049 FIELD_SIZEOF(struct sk_buff, cb));
1051 dccp_hashinfo.bind_bucket_cachep =
1052 kmem_cache_create("dccp_bind_bucket",
1053 sizeof(struct inet_bind_bucket), 0,
1054 SLAB_HWCACHE_ALIGN, NULL);
1055 if (!dccp_hashinfo.bind_bucket_cachep)
1059 * Size and allocate the main established and bind bucket
1062 * The methodology is similar to that of the buffer cache.
1064 if (num_physpages >= (128 * 1024))
1065 goal = num_physpages >> (21 - PAGE_SHIFT);
1067 goal = num_physpages >> (23 - PAGE_SHIFT);
1070 goal = (thash_entries *
1071 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1072 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1075 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1076 sizeof(struct inet_ehash_bucket);
1077 while (dccp_hashinfo.ehash_size &
1078 (dccp_hashinfo.ehash_size - 1))
1079 dccp_hashinfo.ehash_size--;
1080 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1081 __get_free_pages(GFP_ATOMIC, ehash_order);
1082 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1084 if (!dccp_hashinfo.ehash) {
1085 DCCP_CRIT("Failed to allocate DCCP established hash table");
1086 goto out_free_bind_bucket_cachep;
1089 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1090 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1091 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1094 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1095 goto out_free_dccp_ehash;
1097 bhash_order = ehash_order;
1100 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1101 sizeof(struct inet_bind_hashbucket);
1102 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1105 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1106 __get_free_pages(GFP_ATOMIC, bhash_order);
1107 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1109 if (!dccp_hashinfo.bhash) {
1110 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1111 goto out_free_dccp_locks;
1114 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1115 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1116 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1119 rc = dccp_mib_init();
1121 goto out_free_dccp_bhash;
1123 rc = dccp_ackvec_init();
1125 goto out_free_dccp_mib;
1127 rc = dccp_sysctl_init();
1129 goto out_ackvec_exit;
1131 dccp_timestamping_init();
1138 out_free_dccp_bhash:
1139 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1140 dccp_hashinfo.bhash = NULL;
1141 out_free_dccp_locks:
1142 inet_ehash_locks_free(&dccp_hashinfo);
1143 out_free_dccp_ehash:
1144 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1145 dccp_hashinfo.ehash = NULL;
1146 out_free_bind_bucket_cachep:
1147 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1148 dccp_hashinfo.bind_bucket_cachep = NULL;
1152 static void __exit dccp_fini(void)
1155 free_pages((unsigned long)dccp_hashinfo.bhash,
1156 get_order(dccp_hashinfo.bhash_size *
1157 sizeof(struct inet_bind_hashbucket)));
1158 free_pages((unsigned long)dccp_hashinfo.ehash,
1159 get_order(dccp_hashinfo.ehash_size *
1160 sizeof(struct inet_ehash_bucket)));
1161 inet_ehash_locks_free(&dccp_hashinfo);
1162 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1167 module_init(dccp_init);
1168 module_exit(dccp_fini);
1170 MODULE_LICENSE("GPL");
1171 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1172 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");