]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/dccp/proto.c
[DCCP]: Support for partial checksums (RFC 4340, sec. 9.2)
[net-next-2.6.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         sk->sk_state            = DCCP_CLOSED;
216         sk->sk_write_space      = dccp_write_space;
217         icsk->icsk_sync_mss     = dccp_sync_mss;
218         dp->dccps_mss_cache     = 536;
219         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
220         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
221         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
222
223         return 0;
224 }
225
226 EXPORT_SYMBOL_GPL(dccp_init_sock);
227
228 int dccp_destroy_sock(struct sock *sk)
229 {
230         struct dccp_sock *dp = dccp_sk(sk);
231         struct dccp_minisock *dmsk = dccp_msk(sk);
232
233         /*
234          * DCCP doesn't use sk_write_queue, just sk_send_head
235          * for retransmissions
236          */
237         if (sk->sk_send_head != NULL) {
238                 kfree_skb(sk->sk_send_head);
239                 sk->sk_send_head = NULL;
240         }
241
242         /* Clean up a referenced DCCP bind bucket. */
243         if (inet_csk(sk)->icsk_bind_hash != NULL)
244                 inet_put_port(&dccp_hashinfo, sk);
245
246         kfree(dp->dccps_service_list);
247         dp->dccps_service_list = NULL;
248
249         if (dmsk->dccpms_send_ack_vector) {
250                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251                 dp->dccps_hc_rx_ackvec = NULL;
252         }
253         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257         /* clean up feature negotiation state */
258         dccp_feat_clean(dmsk);
259
260         return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk, int backlog)
266 {
267         struct dccp_sock *dp = dccp_sk(sk);
268
269         dp->dccps_role = DCCP_ROLE_LISTEN;
270         return inet_csk_listen_start(sk, backlog);
271 }
272
273 int dccp_disconnect(struct sock *sk, int flags)
274 {
275         struct inet_connection_sock *icsk = inet_csk(sk);
276         struct inet_sock *inet = inet_sk(sk);
277         int err = 0;
278         const int old_state = sk->sk_state;
279
280         if (old_state != DCCP_CLOSED)
281                 dccp_set_state(sk, DCCP_CLOSED);
282
283         /* ABORT function of RFC793 */
284         if (old_state == DCCP_LISTEN) {
285                 inet_csk_listen_stop(sk);
286         /* FIXME: do the active reset thing */
287         } else if (old_state == DCCP_REQUESTING)
288                 sk->sk_err = ECONNRESET;
289
290         dccp_clear_xmit_timers(sk);
291         __skb_queue_purge(&sk->sk_receive_queue);
292         if (sk->sk_send_head != NULL) {
293                 __kfree_skb(sk->sk_send_head);
294                 sk->sk_send_head = NULL;
295         }
296
297         inet->dport = 0;
298
299         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
300                 inet_reset_saddr(sk);
301
302         sk->sk_shutdown = 0;
303         sock_reset_flag(sk, SOCK_DONE);
304
305         icsk->icsk_backoff = 0;
306         inet_csk_delack_init(sk);
307         __sk_dst_reset(sk);
308
309         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
310
311         sk->sk_error_report(sk);
312         return err;
313 }
314
315 EXPORT_SYMBOL_GPL(dccp_disconnect);
316
317 /*
318  *      Wait for a DCCP event.
319  *
320  *      Note that we don't need to lock the socket, as the upper poll layers
321  *      take care of normal races (between the test and the event) and we don't
322  *      go look at any of the socket buffers directly.
323  */
324 unsigned int dccp_poll(struct file *file, struct socket *sock,
325                        poll_table *wait)
326 {
327         unsigned int mask;
328         struct sock *sk = sock->sk;
329
330         poll_wait(file, sk->sk_sleep, wait);
331         if (sk->sk_state == DCCP_LISTEN)
332                 return inet_csk_listen_poll(sk);
333
334         /* Socket is not locked. We are protected from async events
335            by poll logic and correct handling of state changes
336            made by another threads is impossible in any case.
337          */
338
339         mask = 0;
340         if (sk->sk_err)
341                 mask = POLLERR;
342
343         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
344                 mask |= POLLHUP;
345         if (sk->sk_shutdown & RCV_SHUTDOWN)
346                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
347
348         /* Connected? */
349         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
350                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
351                         mask |= POLLIN | POLLRDNORM;
352
353                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
354                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
355                                 mask |= POLLOUT | POLLWRNORM;
356                         } else {  /* send SIGIO later */
357                                 set_bit(SOCK_ASYNC_NOSPACE,
358                                         &sk->sk_socket->flags);
359                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
360
361                                 /* Race breaker. If space is freed after
362                                  * wspace test but before the flags are set,
363                                  * IO signal will be lost.
364                                  */
365                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
366                                         mask |= POLLOUT | POLLWRNORM;
367                         }
368                 }
369         }
370         return mask;
371 }
372
373 EXPORT_SYMBOL_GPL(dccp_poll);
374
375 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
376 {
377         dccp_pr_debug("entry\n");
378         return -ENOIOCTLCMD;
379 }
380
381 EXPORT_SYMBOL_GPL(dccp_ioctl);
382
383 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
384                                    char __user *optval, int optlen)
385 {
386         struct dccp_sock *dp = dccp_sk(sk);
387         struct dccp_service_list *sl = NULL;
388
389         if (service == DCCP_SERVICE_INVALID_VALUE || 
390             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
391                 return -EINVAL;
392
393         if (optlen > sizeof(service)) {
394                 sl = kmalloc(optlen, GFP_KERNEL);
395                 if (sl == NULL)
396                         return -ENOMEM;
397
398                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
399                 if (copy_from_user(sl->dccpsl_list,
400                                    optval + sizeof(service),
401                                    optlen - sizeof(service)) ||
402                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
403                         kfree(sl);
404                         return -EFAULT;
405                 }
406         }
407
408         lock_sock(sk);
409         dp->dccps_service = service;
410
411         kfree(dp->dccps_service_list);
412
413         dp->dccps_service_list = sl;
414         release_sock(sk);
415         return 0;
416 }
417
418 /* byte 1 is feature.  the rest is the preference list */
419 static int dccp_setsockopt_change(struct sock *sk, int type,
420                                   struct dccp_so_feat __user *optval)
421 {
422         struct dccp_so_feat opt;
423         u8 *val;
424         int rc;
425
426         if (copy_from_user(&opt, optval, sizeof(opt)))
427                 return -EFAULT;
428
429         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
430         if (!val)
431                 return -ENOMEM;
432
433         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
434                 rc = -EFAULT;
435                 goto out_free_val;
436         }
437
438         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
439                               val, opt.dccpsf_len, GFP_KERNEL);
440         if (rc)
441                 goto out_free_val;
442
443 out:
444         return rc;
445
446 out_free_val:
447         kfree(val);
448         goto out;
449 }
450
451 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
452                 char __user *optval, int optlen)
453 {
454         struct dccp_sock *dp;
455         int err;
456         int val;
457
458         if (optlen < sizeof(int))
459                 return -EINVAL;
460
461         if (get_user(val, (int __user *)optval))
462                 return -EFAULT;
463
464         if (optname == DCCP_SOCKOPT_SERVICE)
465                 return dccp_setsockopt_service(sk, val, optval, optlen);
466
467         lock_sock(sk);
468         dp = dccp_sk(sk);
469         err = 0;
470
471         switch (optname) {
472         case DCCP_SOCKOPT_PACKET_SIZE:
473                 dp->dccps_packet_size = val;
474                 break;
475         case DCCP_SOCKOPT_CHANGE_L:
476                 if (optlen != sizeof(struct dccp_so_feat))
477                         err = -EINVAL;
478                 else
479                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
480                                                      (struct dccp_so_feat __user *)
481                                                      optval);
482                 break;
483         case DCCP_SOCKOPT_CHANGE_R:
484                 if (optlen != sizeof(struct dccp_so_feat))
485                         err = -EINVAL;
486                 else
487                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
488                                                      (struct dccp_so_feat __user *)
489                                                      optval);
490                 break;
491         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
492                 if (val < 0 || val > 15)
493                         err = -EINVAL;
494                 else
495                         dp->dccps_pcslen = val;
496                 break;
497         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
498                 if (val < 0 || val > 15)
499                         err = -EINVAL;
500                 else {
501                         dp->dccps_pcrlen = val;
502                         /* FIXME: add feature negotiation,
503                          * ChangeL(MinimumChecksumCoverage, val) */
504                 }
505                 break;
506         default:
507                 err = -ENOPROTOOPT;
508                 break;
509         }
510
511         release_sock(sk);
512         return err;
513 }
514
515 int dccp_setsockopt(struct sock *sk, int level, int optname,
516                     char __user *optval, int optlen)
517 {
518         if (level != SOL_DCCP)
519                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
520                                                              optname, optval,
521                                                              optlen);
522         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
523 }
524
525 EXPORT_SYMBOL_GPL(dccp_setsockopt);
526
527 #ifdef CONFIG_COMPAT
528 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
529                            char __user *optval, int optlen)
530 {
531         if (level != SOL_DCCP)
532                 return inet_csk_compat_setsockopt(sk, level, optname,
533                                                   optval, optlen);
534         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
535 }
536
537 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
538 #endif
539
540 static int dccp_getsockopt_service(struct sock *sk, int len,
541                                    __be32 __user *optval,
542                                    int __user *optlen)
543 {
544         const struct dccp_sock *dp = dccp_sk(sk);
545         const struct dccp_service_list *sl;
546         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
547
548         lock_sock(sk);
549         if ((sl = dp->dccps_service_list) != NULL) {
550                 slen = sl->dccpsl_nr * sizeof(u32);
551                 total_len += slen;
552         }
553
554         err = -EINVAL;
555         if (total_len > len)
556                 goto out;
557
558         err = 0;
559         if (put_user(total_len, optlen) ||
560             put_user(dp->dccps_service, optval) ||
561             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
562                 err = -EFAULT;
563 out:
564         release_sock(sk);
565         return err;
566 }
567
568 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
569                     char __user *optval, int __user *optlen)
570 {
571         struct dccp_sock *dp;
572         int val, len;
573
574         if (get_user(len, optlen))
575                 return -EFAULT;
576
577         if (len < sizeof(int))
578                 return -EINVAL;
579
580         dp = dccp_sk(sk);
581
582         switch (optname) {
583         case DCCP_SOCKOPT_PACKET_SIZE:
584                 val = dp->dccps_packet_size;
585                 len = sizeof(dp->dccps_packet_size);
586                 break;
587         case DCCP_SOCKOPT_SERVICE:
588                 return dccp_getsockopt_service(sk, len,
589                                                (__be32 __user *)optval, optlen);
590         case DCCP_SOCKOPT_SEND_CSCOV:
591                 val = dp->dccps_pcslen;
592                 break;
593         case DCCP_SOCKOPT_RECV_CSCOV:
594                 val = dp->dccps_pcrlen;
595                 break;
596         case 128 ... 191:
597                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
598                                              len, (u32 __user *)optval, optlen);
599         case 192 ... 255:
600                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
601                                              len, (u32 __user *)optval, optlen);
602         default:
603                 return -ENOPROTOOPT;
604         }
605
606         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
607                 return -EFAULT;
608
609         return 0;
610 }
611
612 int dccp_getsockopt(struct sock *sk, int level, int optname,
613                     char __user *optval, int __user *optlen)
614 {
615         if (level != SOL_DCCP)
616                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
617                                                              optname, optval,
618                                                              optlen);
619         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
620 }
621
622 EXPORT_SYMBOL_GPL(dccp_getsockopt);
623
624 #ifdef CONFIG_COMPAT
625 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
626                            char __user *optval, int __user *optlen)
627 {
628         if (level != SOL_DCCP)
629                 return inet_csk_compat_getsockopt(sk, level, optname,
630                                                   optval, optlen);
631         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
632 }
633
634 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
635 #endif
636
637 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
638                  size_t len)
639 {
640         const struct dccp_sock *dp = dccp_sk(sk);
641         const int flags = msg->msg_flags;
642         const int noblock = flags & MSG_DONTWAIT;
643         struct sk_buff *skb;
644         int rc, size;
645         long timeo;
646
647         if (len > dp->dccps_mss_cache)
648                 return -EMSGSIZE;
649
650         lock_sock(sk);
651         timeo = sock_sndtimeo(sk, noblock);
652
653         /*
654          * We have to use sk_stream_wait_connect here to set sk_write_pending,
655          * so that the trick in dccp_rcv_request_sent_state_process.
656          */
657         /* Wait for a connection to finish. */
658         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
659                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
660                         goto out_release;
661
662         size = sk->sk_prot->max_header + len;
663         release_sock(sk);
664         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
665         lock_sock(sk);
666         if (skb == NULL)
667                 goto out_release;
668
669         skb_reserve(skb, sk->sk_prot->max_header);
670         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
671         if (rc != 0)
672                 goto out_discard;
673
674         skb_queue_tail(&sk->sk_write_queue, skb);
675         dccp_write_xmit(sk,0);
676 out_release:
677         release_sock(sk);
678         return rc ? : len;
679 out_discard:
680         kfree_skb(skb);
681         goto out_release;
682 }
683
684 EXPORT_SYMBOL_GPL(dccp_sendmsg);
685
686 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687                  size_t len, int nonblock, int flags, int *addr_len)
688 {
689         const struct dccp_hdr *dh;
690         long timeo;
691
692         lock_sock(sk);
693
694         if (sk->sk_state == DCCP_LISTEN) {
695                 len = -ENOTCONN;
696                 goto out;
697         }
698
699         timeo = sock_rcvtimeo(sk, nonblock);
700
701         do {
702                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703
704                 if (skb == NULL)
705                         goto verify_sock_status;
706
707                 dh = dccp_hdr(skb);
708
709                 if (dh->dccph_type == DCCP_PKT_DATA ||
710                     dh->dccph_type == DCCP_PKT_DATAACK)
711                         goto found_ok_skb;
712
713                 if (dh->dccph_type == DCCP_PKT_RESET ||
714                     dh->dccph_type == DCCP_PKT_CLOSE) {
715                         dccp_pr_debug("found fin ok!\n");
716                         len = 0;
717                         goto found_fin_ok;
718                 }
719                 dccp_pr_debug("packet_type=%s\n",
720                               dccp_packet_name(dh->dccph_type));
721                 sk_eat_skb(sk, skb, 0);
722 verify_sock_status:
723                 if (sock_flag(sk, SOCK_DONE)) {
724                         len = 0;
725                         break;
726                 }
727
728                 if (sk->sk_err) {
729                         len = sock_error(sk);
730                         break;
731                 }
732
733                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
734                         len = 0;
735                         break;
736                 }
737
738                 if (sk->sk_state == DCCP_CLOSED) {
739                         if (!sock_flag(sk, SOCK_DONE)) {
740                                 /* This occurs when user tries to read
741                                  * from never connected socket.
742                                  */
743                                 len = -ENOTCONN;
744                                 break;
745                         }
746                         len = 0;
747                         break;
748                 }
749
750                 if (!timeo) {
751                         len = -EAGAIN;
752                         break;
753                 }
754
755                 if (signal_pending(current)) {
756                         len = sock_intr_errno(timeo);
757                         break;
758                 }
759
760                 sk_wait_data(sk, &timeo);
761                 continue;
762         found_ok_skb:
763                 if (len > skb->len)
764                         len = skb->len;
765                 else if (len < skb->len)
766                         msg->msg_flags |= MSG_TRUNC;
767
768                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
769                         /* Exception. Bailout! */
770                         len = -EFAULT;
771                         break;
772                 }
773         found_fin_ok:
774                 if (!(flags & MSG_PEEK))
775                         sk_eat_skb(sk, skb, 0);
776                 break;
777         } while (1);
778 out:
779         release_sock(sk);
780         return len;
781 }
782
783 EXPORT_SYMBOL_GPL(dccp_recvmsg);
784
785 int inet_dccp_listen(struct socket *sock, int backlog)
786 {
787         struct sock *sk = sock->sk;
788         unsigned char old_state;
789         int err;
790
791         lock_sock(sk);
792
793         err = -EINVAL;
794         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
795                 goto out;
796
797         old_state = sk->sk_state;
798         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
799                 goto out;
800
801         /* Really, if the socket is already in listen state
802          * we can only allow the backlog to be adjusted.
803          */
804         if (old_state != DCCP_LISTEN) {
805                 /*
806                  * FIXME: here it probably should be sk->sk_prot->listen_start
807                  * see tcp_listen_start
808                  */
809                 err = dccp_listen_start(sk, backlog);
810                 if (err)
811                         goto out;
812         }
813         sk->sk_max_ack_backlog = backlog;
814         err = 0;
815
816 out:
817         release_sock(sk);
818         return err;
819 }
820
821 EXPORT_SYMBOL_GPL(inet_dccp_listen);
822
823 static const unsigned char dccp_new_state[] = {
824         /* current state:   new state:      action:     */
825         [0]               = DCCP_CLOSED,
826         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
827         [DCCP_REQUESTING] = DCCP_CLOSED,
828         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
829         [DCCP_LISTEN]     = DCCP_CLOSED,
830         [DCCP_RESPOND]    = DCCP_CLOSED,
831         [DCCP_CLOSING]    = DCCP_CLOSED,
832         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
833         [DCCP_CLOSED]     = DCCP_CLOSED,
834 };
835
836 static int dccp_close_state(struct sock *sk)
837 {
838         const int next = dccp_new_state[sk->sk_state];
839         const int ns = next & DCCP_STATE_MASK;
840
841         if (ns != sk->sk_state)
842                 dccp_set_state(sk, ns);
843
844         return next & DCCP_ACTION_FIN;
845 }
846
847 void dccp_close(struct sock *sk, long timeout)
848 {
849         struct dccp_sock *dp = dccp_sk(sk);
850         struct sk_buff *skb;
851         int state;
852
853         lock_sock(sk);
854
855         sk->sk_shutdown = SHUTDOWN_MASK;
856
857         if (sk->sk_state == DCCP_LISTEN) {
858                 dccp_set_state(sk, DCCP_CLOSED);
859
860                 /* Special case. */
861                 inet_csk_listen_stop(sk);
862
863                 goto adjudge_to_death;
864         }
865
866         sk_stop_timer(sk, &dp->dccps_xmit_timer);
867
868         /*
869          * We need to flush the recv. buffs.  We do this only on the
870          * descriptor close, not protocol-sourced closes, because the
871           *reader process may not have drained the data yet!
872          */
873         /* FIXME: check for unread data */
874         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
875                 __kfree_skb(skb);
876         }
877
878         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
879                 /* Check zero linger _after_ checking for unread data. */
880                 sk->sk_prot->disconnect(sk, 0);
881         } else if (dccp_close_state(sk)) {
882                 dccp_send_close(sk, 1);
883         }
884
885         sk_stream_wait_close(sk, timeout);
886
887 adjudge_to_death:
888         state = sk->sk_state;
889         sock_hold(sk);
890         sock_orphan(sk);
891         atomic_inc(sk->sk_prot->orphan_count);
892
893         /*
894          * It is the last release_sock in its life. It will remove backlog.
895          */
896         release_sock(sk);
897         /*
898          * Now socket is owned by kernel and we acquire BH lock
899          * to finish close. No need to check for user refs.
900          */
901         local_bh_disable();
902         bh_lock_sock(sk);
903         BUG_TRAP(!sock_owned_by_user(sk));
904
905         /* Have we already been destroyed by a softirq or backlog? */
906         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
907                 goto out;
908
909         /*
910          * The last release_sock may have processed the CLOSE or RESET
911          * packet moving sock to CLOSED state, if not we have to fire
912          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
913          * in draft-ietf-dccp-spec-11. -acme
914          */
915         if (sk->sk_state == DCCP_CLOSING) {
916                 /* FIXME: should start at 2 * RTT */
917                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
918                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
919                                           inet_csk(sk)->icsk_rto,
920                                           DCCP_RTO_MAX);
921 #if 0
922                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
923                 dccp_set_state(sk, DCCP_CLOSED);
924 #endif
925         }
926
927         if (sk->sk_state == DCCP_CLOSED)
928                 inet_csk_destroy_sock(sk);
929
930         /* Otherwise, socket is reprieved until protocol close. */
931
932 out:
933         bh_unlock_sock(sk);
934         local_bh_enable();
935         sock_put(sk);
936 }
937
938 EXPORT_SYMBOL_GPL(dccp_close);
939
940 void dccp_shutdown(struct sock *sk, int how)
941 {
942         dccp_pr_debug("entry\n");
943 }
944
945 EXPORT_SYMBOL_GPL(dccp_shutdown);
946
947 static int __init dccp_mib_init(void)
948 {
949         int rc = -ENOMEM;
950
951         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
952         if (dccp_statistics[0] == NULL)
953                 goto out;
954
955         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
956         if (dccp_statistics[1] == NULL)
957                 goto out_free_one;
958
959         rc = 0;
960 out:
961         return rc;
962 out_free_one:
963         free_percpu(dccp_statistics[0]);
964         dccp_statistics[0] = NULL;
965         goto out;
966
967 }
968
969 static void dccp_mib_exit(void)
970 {
971         free_percpu(dccp_statistics[0]);
972         free_percpu(dccp_statistics[1]);
973         dccp_statistics[0] = dccp_statistics[1] = NULL;
974 }
975
976 static int thash_entries;
977 module_param(thash_entries, int, 0444);
978 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
979
980 #ifdef CONFIG_IP_DCCP_DEBUG
981 int dccp_debug;
982 module_param(dccp_debug, int, 0444);
983 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
984
985 EXPORT_SYMBOL_GPL(dccp_debug);
986 #endif
987
988 static int __init dccp_init(void)
989 {
990         unsigned long goal;
991         int ehash_order, bhash_order, i;
992         int rc = -ENOBUFS;
993
994         dccp_hashinfo.bind_bucket_cachep =
995                 kmem_cache_create("dccp_bind_bucket",
996                                   sizeof(struct inet_bind_bucket), 0,
997                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
998         if (!dccp_hashinfo.bind_bucket_cachep)
999                 goto out;
1000
1001         /*
1002          * Size and allocate the main established and bind bucket
1003          * hash tables.
1004          *
1005          * The methodology is similar to that of the buffer cache.
1006          */
1007         if (num_physpages >= (128 * 1024))
1008                 goal = num_physpages >> (21 - PAGE_SHIFT);
1009         else
1010                 goal = num_physpages >> (23 - PAGE_SHIFT);
1011
1012         if (thash_entries)
1013                 goal = (thash_entries *
1014                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1015         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1016                 ;
1017         do {
1018                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1019                                         sizeof(struct inet_ehash_bucket);
1020                 dccp_hashinfo.ehash_size >>= 1;
1021                 while (dccp_hashinfo.ehash_size &
1022                        (dccp_hashinfo.ehash_size - 1))
1023                         dccp_hashinfo.ehash_size--;
1024                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1025                         __get_free_pages(GFP_ATOMIC, ehash_order);
1026         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1027
1028         if (!dccp_hashinfo.ehash) {
1029                 printk(KERN_CRIT "Failed to allocate DCCP "
1030                                  "established hash table\n");
1031                 goto out_free_bind_bucket_cachep;
1032         }
1033
1034         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1035                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1036                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1037         }
1038
1039         bhash_order = ehash_order;
1040
1041         do {
1042                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1043                                         sizeof(struct inet_bind_hashbucket);
1044                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1045                     bhash_order > 0)
1046                         continue;
1047                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1048                         __get_free_pages(GFP_ATOMIC, bhash_order);
1049         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1050
1051         if (!dccp_hashinfo.bhash) {
1052                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1053                 goto out_free_dccp_ehash;
1054         }
1055
1056         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1057                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1058                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1059         }
1060
1061         rc = dccp_mib_init();
1062         if (rc)
1063                 goto out_free_dccp_bhash;
1064
1065         rc = dccp_ackvec_init();
1066         if (rc)
1067                 goto out_free_dccp_mib;
1068
1069         rc = dccp_sysctl_init();
1070         if (rc)
1071                 goto out_ackvec_exit;
1072 out:
1073         return rc;
1074 out_ackvec_exit:
1075         dccp_ackvec_exit();
1076 out_free_dccp_mib:
1077         dccp_mib_exit();
1078 out_free_dccp_bhash:
1079         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1080         dccp_hashinfo.bhash = NULL;
1081 out_free_dccp_ehash:
1082         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1083         dccp_hashinfo.ehash = NULL;
1084 out_free_bind_bucket_cachep:
1085         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1086         dccp_hashinfo.bind_bucket_cachep = NULL;
1087         goto out;
1088 }
1089
1090 static void __exit dccp_fini(void)
1091 {
1092         dccp_mib_exit();
1093         free_pages((unsigned long)dccp_hashinfo.bhash,
1094                    get_order(dccp_hashinfo.bhash_size *
1095                              sizeof(struct inet_bind_hashbucket)));
1096         free_pages((unsigned long)dccp_hashinfo.ehash,
1097                    get_order(dccp_hashinfo.ehash_size *
1098                              sizeof(struct inet_ehash_bucket)));
1099         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1100         dccp_ackvec_exit();
1101         dccp_sysctl_exit();
1102 }
1103
1104 module_init(dccp_init);
1105 module_exit(dccp_fini);
1106
1107 MODULE_LICENSE("GPL");
1108 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1109 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");