]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/dccp/proto.c
Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik...
[net-next-2.6.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         sk->sk_state            = DCCP_CLOSED;
216         sk->sk_write_space      = dccp_write_space;
217         icsk->icsk_sync_mss     = dccp_sync_mss;
218         dp->dccps_mss_cache     = 536;
219         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
220         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
221         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
222
223         return 0;
224 }
225
226 EXPORT_SYMBOL_GPL(dccp_init_sock);
227
228 int dccp_destroy_sock(struct sock *sk)
229 {
230         struct dccp_sock *dp = dccp_sk(sk);
231         struct dccp_minisock *dmsk = dccp_msk(sk);
232
233         /*
234          * DCCP doesn't use sk_write_queue, just sk_send_head
235          * for retransmissions
236          */
237         if (sk->sk_send_head != NULL) {
238                 kfree_skb(sk->sk_send_head);
239                 sk->sk_send_head = NULL;
240         }
241
242         /* Clean up a referenced DCCP bind bucket. */
243         if (inet_csk(sk)->icsk_bind_hash != NULL)
244                 inet_put_port(&dccp_hashinfo, sk);
245
246         kfree(dp->dccps_service_list);
247         dp->dccps_service_list = NULL;
248
249         if (dmsk->dccpms_send_ack_vector) {
250                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251                 dp->dccps_hc_rx_ackvec = NULL;
252         }
253         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257         /* clean up feature negotiation state */
258         dccp_feat_clean(dmsk);
259
260         return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk)
266 {
267         struct dccp_sock *dp = dccp_sk(sk);
268
269         dp->dccps_role = DCCP_ROLE_LISTEN;
270         /*
271          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
272          * before calling listen()
273          */
274         if (dccp_service_not_initialized(sk))
275                 return -EPROTO;
276         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
277 }
278
279 int dccp_disconnect(struct sock *sk, int flags)
280 {
281         struct inet_connection_sock *icsk = inet_csk(sk);
282         struct inet_sock *inet = inet_sk(sk);
283         int err = 0;
284         const int old_state = sk->sk_state;
285
286         if (old_state != DCCP_CLOSED)
287                 dccp_set_state(sk, DCCP_CLOSED);
288
289         /* ABORT function of RFC793 */
290         if (old_state == DCCP_LISTEN) {
291                 inet_csk_listen_stop(sk);
292         /* FIXME: do the active reset thing */
293         } else if (old_state == DCCP_REQUESTING)
294                 sk->sk_err = ECONNRESET;
295
296         dccp_clear_xmit_timers(sk);
297         __skb_queue_purge(&sk->sk_receive_queue);
298         if (sk->sk_send_head != NULL) {
299                 __kfree_skb(sk->sk_send_head);
300                 sk->sk_send_head = NULL;
301         }
302
303         inet->dport = 0;
304
305         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
306                 inet_reset_saddr(sk);
307
308         sk->sk_shutdown = 0;
309         sock_reset_flag(sk, SOCK_DONE);
310
311         icsk->icsk_backoff = 0;
312         inet_csk_delack_init(sk);
313         __sk_dst_reset(sk);
314
315         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316
317         sk->sk_error_report(sk);
318         return err;
319 }
320
321 EXPORT_SYMBOL_GPL(dccp_disconnect);
322
323 /*
324  *      Wait for a DCCP event.
325  *
326  *      Note that we don't need to lock the socket, as the upper poll layers
327  *      take care of normal races (between the test and the event) and we don't
328  *      go look at any of the socket buffers directly.
329  */
330 unsigned int dccp_poll(struct file *file, struct socket *sock,
331                        poll_table *wait)
332 {
333         unsigned int mask;
334         struct sock *sk = sock->sk;
335
336         poll_wait(file, sk->sk_sleep, wait);
337         if (sk->sk_state == DCCP_LISTEN)
338                 return inet_csk_listen_poll(sk);
339
340         /* Socket is not locked. We are protected from async events
341            by poll logic and correct handling of state changes
342            made by another threads is impossible in any case.
343          */
344
345         mask = 0;
346         if (sk->sk_err)
347                 mask = POLLERR;
348
349         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350                 mask |= POLLHUP;
351         if (sk->sk_shutdown & RCV_SHUTDOWN)
352                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
353
354         /* Connected? */
355         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
356                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
357                         mask |= POLLIN | POLLRDNORM;
358
359                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
360                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
361                                 mask |= POLLOUT | POLLWRNORM;
362                         } else {  /* send SIGIO later */
363                                 set_bit(SOCK_ASYNC_NOSPACE,
364                                         &sk->sk_socket->flags);
365                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366
367                                 /* Race breaker. If space is freed after
368                                  * wspace test but before the flags are set,
369                                  * IO signal will be lost.
370                                  */
371                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
372                                         mask |= POLLOUT | POLLWRNORM;
373                         }
374                 }
375         }
376         return mask;
377 }
378
379 EXPORT_SYMBOL_GPL(dccp_poll);
380
381 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 {
383         dccp_pr_debug("entry\n");
384         return -ENOIOCTLCMD;
385 }
386
387 EXPORT_SYMBOL_GPL(dccp_ioctl);
388
389 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
390                                    char __user *optval, int optlen)
391 {
392         struct dccp_sock *dp = dccp_sk(sk);
393         struct dccp_service_list *sl = NULL;
394
395         if (service == DCCP_SERVICE_INVALID_VALUE || 
396             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
397                 return -EINVAL;
398
399         if (optlen > sizeof(service)) {
400                 sl = kmalloc(optlen, GFP_KERNEL);
401                 if (sl == NULL)
402                         return -ENOMEM;
403
404                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
405                 if (copy_from_user(sl->dccpsl_list,
406                                    optval + sizeof(service),
407                                    optlen - sizeof(service)) ||
408                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
409                         kfree(sl);
410                         return -EFAULT;
411                 }
412         }
413
414         lock_sock(sk);
415         dp->dccps_service = service;
416
417         kfree(dp->dccps_service_list);
418
419         dp->dccps_service_list = sl;
420         release_sock(sk);
421         return 0;
422 }
423
424 /* byte 1 is feature.  the rest is the preference list */
425 static int dccp_setsockopt_change(struct sock *sk, int type,
426                                   struct dccp_so_feat __user *optval)
427 {
428         struct dccp_so_feat opt;
429         u8 *val;
430         int rc;
431
432         if (copy_from_user(&opt, optval, sizeof(opt)))
433                 return -EFAULT;
434
435         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
436         if (!val)
437                 return -ENOMEM;
438
439         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
440                 rc = -EFAULT;
441                 goto out_free_val;
442         }
443
444         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
445                               val, opt.dccpsf_len, GFP_KERNEL);
446         if (rc)
447                 goto out_free_val;
448
449 out:
450         return rc;
451
452 out_free_val:
453         kfree(val);
454         goto out;
455 }
456
457 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
458                 char __user *optval, int optlen)
459 {
460         struct dccp_sock *dp;
461         int err;
462         int val;
463
464         if (optlen < sizeof(int))
465                 return -EINVAL;
466
467         if (get_user(val, (int __user *)optval))
468                 return -EFAULT;
469
470         if (optname == DCCP_SOCKOPT_SERVICE)
471                 return dccp_setsockopt_service(sk, val, optval, optlen);
472
473         lock_sock(sk);
474         dp = dccp_sk(sk);
475         err = 0;
476
477         switch (optname) {
478         case DCCP_SOCKOPT_PACKET_SIZE:
479                 dp->dccps_packet_size = val;
480                 break;
481
482         case DCCP_SOCKOPT_CHANGE_L:
483                 if (optlen != sizeof(struct dccp_so_feat))
484                         err = -EINVAL;
485                 else
486                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
487                                                      (struct dccp_so_feat __user *)
488                                                      optval);
489                 break;
490
491         case DCCP_SOCKOPT_CHANGE_R:
492                 if (optlen != sizeof(struct dccp_so_feat))
493                         err = -EINVAL;
494                 else
495                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
496                                                      (struct dccp_so_feat __user *)
497                                                      optval);
498                 break;
499
500         default:
501                 err = -ENOPROTOOPT;
502                 break;
503         }
504         
505         release_sock(sk);
506         return err;
507 }
508
509 int dccp_setsockopt(struct sock *sk, int level, int optname,
510                     char __user *optval, int optlen)
511 {
512         if (level != SOL_DCCP)
513                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
514                                                              optname, optval,
515                                                              optlen);
516         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
517 }
518
519 EXPORT_SYMBOL_GPL(dccp_setsockopt);
520
521 #ifdef CONFIG_COMPAT
522 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
523                            char __user *optval, int optlen)
524 {
525         if (level != SOL_DCCP)
526                 return inet_csk_compat_setsockopt(sk, level, optname,
527                                                   optval, optlen);
528         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
529 }
530
531 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
532 #endif
533
534 static int dccp_getsockopt_service(struct sock *sk, int len,
535                                    __be32 __user *optval,
536                                    int __user *optlen)
537 {
538         const struct dccp_sock *dp = dccp_sk(sk);
539         const struct dccp_service_list *sl;
540         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
541
542         lock_sock(sk);
543         if (dccp_service_not_initialized(sk))
544                 goto out;
545
546         if ((sl = dp->dccps_service_list) != NULL) {
547                 slen = sl->dccpsl_nr * sizeof(u32);
548                 total_len += slen;
549         }
550
551         err = -EINVAL;
552         if (total_len > len)
553                 goto out;
554
555         err = 0;
556         if (put_user(total_len, optlen) ||
557             put_user(dp->dccps_service, optval) ||
558             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
559                 err = -EFAULT;
560 out:
561         release_sock(sk);
562         return err;
563 }
564
565 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
566                     char __user *optval, int __user *optlen)
567 {
568         struct dccp_sock *dp;
569         int val, len;
570
571         if (get_user(len, optlen))
572                 return -EFAULT;
573
574         if (len < sizeof(int))
575                 return -EINVAL;
576
577         dp = dccp_sk(sk);
578
579         switch (optname) {
580         case DCCP_SOCKOPT_PACKET_SIZE:
581                 val = dp->dccps_packet_size;
582                 len = sizeof(dp->dccps_packet_size);
583                 break;
584         case DCCP_SOCKOPT_SERVICE:
585                 return dccp_getsockopt_service(sk, len,
586                                                (__be32 __user *)optval, optlen);
587         case 128 ... 191:
588                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
589                                              len, (u32 __user *)optval, optlen);
590         case 192 ... 255:
591                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
592                                              len, (u32 __user *)optval, optlen);
593         default:
594                 return -ENOPROTOOPT;
595         }
596
597         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
598                 return -EFAULT;
599
600         return 0;
601 }
602
603 int dccp_getsockopt(struct sock *sk, int level, int optname,
604                     char __user *optval, int __user *optlen)
605 {
606         if (level != SOL_DCCP)
607                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
608                                                              optname, optval,
609                                                              optlen);
610         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
611 }
612
613 EXPORT_SYMBOL_GPL(dccp_getsockopt);
614
615 #ifdef CONFIG_COMPAT
616 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
617                            char __user *optval, int __user *optlen)
618 {
619         if (level != SOL_DCCP)
620                 return inet_csk_compat_getsockopt(sk, level, optname,
621                                                   optval, optlen);
622         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
623 }
624
625 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
626 #endif
627
628 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
629                  size_t len)
630 {
631         const struct dccp_sock *dp = dccp_sk(sk);
632         const int flags = msg->msg_flags;
633         const int noblock = flags & MSG_DONTWAIT;
634         struct sk_buff *skb;
635         int rc, size;
636         long timeo;
637
638         if (len > dp->dccps_mss_cache)
639                 return -EMSGSIZE;
640
641         lock_sock(sk);
642         timeo = sock_sndtimeo(sk, noblock);
643
644         /*
645          * We have to use sk_stream_wait_connect here to set sk_write_pending,
646          * so that the trick in dccp_rcv_request_sent_state_process.
647          */
648         /* Wait for a connection to finish. */
649         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
650                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
651                         goto out_release;
652
653         size = sk->sk_prot->max_header + len;
654         release_sock(sk);
655         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
656         lock_sock(sk);
657         if (skb == NULL)
658                 goto out_release;
659
660         skb_reserve(skb, sk->sk_prot->max_header);
661         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
662         if (rc != 0)
663                 goto out_discard;
664
665         skb_queue_tail(&sk->sk_write_queue, skb);
666         dccp_write_xmit(sk,0);
667 out_release:
668         release_sock(sk);
669         return rc ? : len;
670 out_discard:
671         kfree_skb(skb);
672         goto out_release;
673 }
674
675 EXPORT_SYMBOL_GPL(dccp_sendmsg);
676
677 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
678                  size_t len, int nonblock, int flags, int *addr_len)
679 {
680         const struct dccp_hdr *dh;
681         long timeo;
682
683         lock_sock(sk);
684
685         if (sk->sk_state == DCCP_LISTEN) {
686                 len = -ENOTCONN;
687                 goto out;
688         }
689
690         timeo = sock_rcvtimeo(sk, nonblock);
691
692         do {
693                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
694
695                 if (skb == NULL)
696                         goto verify_sock_status;
697
698                 dh = dccp_hdr(skb);
699
700                 if (dh->dccph_type == DCCP_PKT_DATA ||
701                     dh->dccph_type == DCCP_PKT_DATAACK)
702                         goto found_ok_skb;
703
704                 if (dh->dccph_type == DCCP_PKT_RESET ||
705                     dh->dccph_type == DCCP_PKT_CLOSE) {
706                         dccp_pr_debug("found fin ok!\n");
707                         len = 0;
708                         goto found_fin_ok;
709                 }
710                 dccp_pr_debug("packet_type=%s\n",
711                               dccp_packet_name(dh->dccph_type));
712                 sk_eat_skb(sk, skb, 0);
713 verify_sock_status:
714                 if (sock_flag(sk, SOCK_DONE)) {
715                         len = 0;
716                         break;
717                 }
718
719                 if (sk->sk_err) {
720                         len = sock_error(sk);
721                         break;
722                 }
723
724                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
725                         len = 0;
726                         break;
727                 }
728
729                 if (sk->sk_state == DCCP_CLOSED) {
730                         if (!sock_flag(sk, SOCK_DONE)) {
731                                 /* This occurs when user tries to read
732                                  * from never connected socket.
733                                  */
734                                 len = -ENOTCONN;
735                                 break;
736                         }
737                         len = 0;
738                         break;
739                 }
740
741                 if (!timeo) {
742                         len = -EAGAIN;
743                         break;
744                 }
745
746                 if (signal_pending(current)) {
747                         len = sock_intr_errno(timeo);
748                         break;
749                 }
750
751                 sk_wait_data(sk, &timeo);
752                 continue;
753         found_ok_skb:
754                 if (len > skb->len)
755                         len = skb->len;
756                 else if (len < skb->len)
757                         msg->msg_flags |= MSG_TRUNC;
758
759                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
760                         /* Exception. Bailout! */
761                         len = -EFAULT;
762                         break;
763                 }
764         found_fin_ok:
765                 if (!(flags & MSG_PEEK))
766                         sk_eat_skb(sk, skb, 0);
767                 break;
768         } while (1);
769 out:
770         release_sock(sk);
771         return len;
772 }
773
774 EXPORT_SYMBOL_GPL(dccp_recvmsg);
775
776 int inet_dccp_listen(struct socket *sock, int backlog)
777 {
778         struct sock *sk = sock->sk;
779         unsigned char old_state;
780         int err;
781
782         lock_sock(sk);
783
784         err = -EINVAL;
785         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
786                 goto out;
787
788         old_state = sk->sk_state;
789         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
790                 goto out;
791
792         /* Really, if the socket is already in listen state
793          * we can only allow the backlog to be adjusted.
794          */
795         if (old_state != DCCP_LISTEN) {
796                 /*
797                  * FIXME: here it probably should be sk->sk_prot->listen_start
798                  * see tcp_listen_start
799                  */
800                 err = dccp_listen_start(sk);
801                 if (err)
802                         goto out;
803         }
804         sk->sk_max_ack_backlog = backlog;
805         err = 0;
806
807 out:
808         release_sock(sk);
809         return err;
810 }
811
812 EXPORT_SYMBOL_GPL(inet_dccp_listen);
813
814 static const unsigned char dccp_new_state[] = {
815         /* current state:   new state:      action:     */
816         [0]               = DCCP_CLOSED,
817         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
818         [DCCP_REQUESTING] = DCCP_CLOSED,
819         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
820         [DCCP_LISTEN]     = DCCP_CLOSED,
821         [DCCP_RESPOND]    = DCCP_CLOSED,
822         [DCCP_CLOSING]    = DCCP_CLOSED,
823         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
824         [DCCP_CLOSED]     = DCCP_CLOSED,
825 };
826
827 static int dccp_close_state(struct sock *sk)
828 {
829         const int next = dccp_new_state[sk->sk_state];
830         const int ns = next & DCCP_STATE_MASK;
831
832         if (ns != sk->sk_state)
833                 dccp_set_state(sk, ns);
834
835         return next & DCCP_ACTION_FIN;
836 }
837
838 void dccp_close(struct sock *sk, long timeout)
839 {
840         struct dccp_sock *dp = dccp_sk(sk);
841         struct sk_buff *skb;
842         int state;
843
844         lock_sock(sk);
845
846         sk->sk_shutdown = SHUTDOWN_MASK;
847
848         if (sk->sk_state == DCCP_LISTEN) {
849                 dccp_set_state(sk, DCCP_CLOSED);
850
851                 /* Special case. */
852                 inet_csk_listen_stop(sk);
853
854                 goto adjudge_to_death;
855         }
856
857         sk_stop_timer(sk, &dp->dccps_xmit_timer);
858
859         /*
860          * We need to flush the recv. buffs.  We do this only on the
861          * descriptor close, not protocol-sourced closes, because the
862           *reader process may not have drained the data yet!
863          */
864         /* FIXME: check for unread data */
865         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
866                 __kfree_skb(skb);
867         }
868
869         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
870                 /* Check zero linger _after_ checking for unread data. */
871                 sk->sk_prot->disconnect(sk, 0);
872         } else if (dccp_close_state(sk)) {
873                 dccp_send_close(sk, 1);
874         }
875
876         sk_stream_wait_close(sk, timeout);
877
878 adjudge_to_death:
879         state = sk->sk_state;
880         sock_hold(sk);
881         sock_orphan(sk);
882         atomic_inc(sk->sk_prot->orphan_count);
883
884         /*
885          * It is the last release_sock in its life. It will remove backlog.
886          */
887         release_sock(sk);
888         /*
889          * Now socket is owned by kernel and we acquire BH lock
890          * to finish close. No need to check for user refs.
891          */
892         local_bh_disable();
893         bh_lock_sock(sk);
894         BUG_TRAP(!sock_owned_by_user(sk));
895
896         /* Have we already been destroyed by a softirq or backlog? */
897         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
898                 goto out;
899
900         /*
901          * The last release_sock may have processed the CLOSE or RESET
902          * packet moving sock to CLOSED state, if not we have to fire
903          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
904          * in draft-ietf-dccp-spec-11. -acme
905          */
906         if (sk->sk_state == DCCP_CLOSING) {
907                 /* FIXME: should start at 2 * RTT */
908                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
909                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
910                                           inet_csk(sk)->icsk_rto,
911                                           DCCP_RTO_MAX);
912 #if 0
913                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
914                 dccp_set_state(sk, DCCP_CLOSED);
915 #endif
916         }
917
918         if (sk->sk_state == DCCP_CLOSED)
919                 inet_csk_destroy_sock(sk);
920
921         /* Otherwise, socket is reprieved until protocol close. */
922
923 out:
924         bh_unlock_sock(sk);
925         local_bh_enable();
926         sock_put(sk);
927 }
928
929 EXPORT_SYMBOL_GPL(dccp_close);
930
931 void dccp_shutdown(struct sock *sk, int how)
932 {
933         dccp_pr_debug("entry\n");
934 }
935
936 EXPORT_SYMBOL_GPL(dccp_shutdown);
937
938 static int __init dccp_mib_init(void)
939 {
940         int rc = -ENOMEM;
941
942         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
943         if (dccp_statistics[0] == NULL)
944                 goto out;
945
946         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
947         if (dccp_statistics[1] == NULL)
948                 goto out_free_one;
949
950         rc = 0;
951 out:
952         return rc;
953 out_free_one:
954         free_percpu(dccp_statistics[0]);
955         dccp_statistics[0] = NULL;
956         goto out;
957
958 }
959
960 static void dccp_mib_exit(void)
961 {
962         free_percpu(dccp_statistics[0]);
963         free_percpu(dccp_statistics[1]);
964         dccp_statistics[0] = dccp_statistics[1] = NULL;
965 }
966
967 static int thash_entries;
968 module_param(thash_entries, int, 0444);
969 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
970
971 #ifdef CONFIG_IP_DCCP_DEBUG
972 int dccp_debug;
973 module_param(dccp_debug, int, 0444);
974 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
975
976 EXPORT_SYMBOL_GPL(dccp_debug);
977 #endif
978
979 static int __init dccp_init(void)
980 {
981         unsigned long goal;
982         int ehash_order, bhash_order, i;
983         int rc = -ENOBUFS;
984
985         dccp_hashinfo.bind_bucket_cachep =
986                 kmem_cache_create("dccp_bind_bucket",
987                                   sizeof(struct inet_bind_bucket), 0,
988                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
989         if (!dccp_hashinfo.bind_bucket_cachep)
990                 goto out;
991
992         /*
993          * Size and allocate the main established and bind bucket
994          * hash tables.
995          *
996          * The methodology is similar to that of the buffer cache.
997          */
998         if (num_physpages >= (128 * 1024))
999                 goal = num_physpages >> (21 - PAGE_SHIFT);
1000         else
1001                 goal = num_physpages >> (23 - PAGE_SHIFT);
1002
1003         if (thash_entries)
1004                 goal = (thash_entries *
1005                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1006         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1007                 ;
1008         do {
1009                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1010                                         sizeof(struct inet_ehash_bucket);
1011                 dccp_hashinfo.ehash_size >>= 1;
1012                 while (dccp_hashinfo.ehash_size &
1013                        (dccp_hashinfo.ehash_size - 1))
1014                         dccp_hashinfo.ehash_size--;
1015                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1016                         __get_free_pages(GFP_ATOMIC, ehash_order);
1017         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1018
1019         if (!dccp_hashinfo.ehash) {
1020                 printk(KERN_CRIT "Failed to allocate DCCP "
1021                                  "established hash table\n");
1022                 goto out_free_bind_bucket_cachep;
1023         }
1024
1025         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1026                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1027                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1028         }
1029
1030         bhash_order = ehash_order;
1031
1032         do {
1033                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1034                                         sizeof(struct inet_bind_hashbucket);
1035                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1036                     bhash_order > 0)
1037                         continue;
1038                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1039                         __get_free_pages(GFP_ATOMIC, bhash_order);
1040         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1041
1042         if (!dccp_hashinfo.bhash) {
1043                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1044                 goto out_free_dccp_ehash;
1045         }
1046
1047         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1048                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1049                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1050         }
1051
1052         rc = dccp_mib_init();
1053         if (rc)
1054                 goto out_free_dccp_bhash;
1055
1056         rc = dccp_ackvec_init();
1057         if (rc)
1058                 goto out_free_dccp_mib;
1059
1060         rc = dccp_sysctl_init();
1061         if (rc)
1062                 goto out_ackvec_exit;
1063 out:
1064         return rc;
1065 out_ackvec_exit:
1066         dccp_ackvec_exit();
1067 out_free_dccp_mib:
1068         dccp_mib_exit();
1069 out_free_dccp_bhash:
1070         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1071         dccp_hashinfo.bhash = NULL;
1072 out_free_dccp_ehash:
1073         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1074         dccp_hashinfo.ehash = NULL;
1075 out_free_bind_bucket_cachep:
1076         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1077         dccp_hashinfo.bind_bucket_cachep = NULL;
1078         goto out;
1079 }
1080
1081 static void __exit dccp_fini(void)
1082 {
1083         dccp_mib_exit();
1084         free_pages((unsigned long)dccp_hashinfo.bhash,
1085                    get_order(dccp_hashinfo.bhash_size *
1086                              sizeof(struct inet_bind_hashbucket)));
1087         free_pages((unsigned long)dccp_hashinfo.ehash,
1088                    get_order(dccp_hashinfo.ehash_size *
1089                              sizeof(struct inet_ehash_bucket)));
1090         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1091         dccp_ackvec_exit();
1092         dccp_sysctl_exit();
1093 }
1094
1095 module_init(dccp_init);
1096 module_exit(dccp_fini);
1097
1098 MODULE_LICENSE("GPL");
1099 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1100 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");