]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/dccp/proto.c
dccp: Query supported CCIDs
[net-next-2.6.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         return inet_csk_listen_start(sk, backlog);
282 }
283
284 static inline int dccp_need_reset(int state)
285 {
286         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
287                state != DCCP_REQUESTING;
288 }
289
290 int dccp_disconnect(struct sock *sk, int flags)
291 {
292         struct inet_connection_sock *icsk = inet_csk(sk);
293         struct inet_sock *inet = inet_sk(sk);
294         int err = 0;
295         const int old_state = sk->sk_state;
296
297         if (old_state != DCCP_CLOSED)
298                 dccp_set_state(sk, DCCP_CLOSED);
299
300         /*
301          * This corresponds to the ABORT function of RFC793, sec. 3.8
302          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
303          */
304         if (old_state == DCCP_LISTEN) {
305                 inet_csk_listen_stop(sk);
306         } else if (dccp_need_reset(old_state)) {
307                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
308                 sk->sk_err = ECONNRESET;
309         } else if (old_state == DCCP_REQUESTING)
310                 sk->sk_err = ECONNRESET;
311
312         dccp_clear_xmit_timers(sk);
313
314         __skb_queue_purge(&sk->sk_receive_queue);
315         __skb_queue_purge(&sk->sk_write_queue);
316         if (sk->sk_send_head != NULL) {
317                 __kfree_skb(sk->sk_send_head);
318                 sk->sk_send_head = NULL;
319         }
320
321         inet->dport = 0;
322
323         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324                 inet_reset_saddr(sk);
325
326         sk->sk_shutdown = 0;
327         sock_reset_flag(sk, SOCK_DONE);
328
329         icsk->icsk_backoff = 0;
330         inet_csk_delack_init(sk);
331         __sk_dst_reset(sk);
332
333         WARN_ON(inet->num && !icsk->icsk_bind_hash);
334
335         sk->sk_error_report(sk);
336         return err;
337 }
338
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
340
341 /*
342  *      Wait for a DCCP event.
343  *
344  *      Note that we don't need to lock the socket, as the upper poll layers
345  *      take care of normal races (between the test and the event) and we don't
346  *      go look at any of the socket buffers directly.
347  */
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
349                        poll_table *wait)
350 {
351         unsigned int mask;
352         struct sock *sk = sock->sk;
353
354         poll_wait(file, sk->sk_sleep, wait);
355         if (sk->sk_state == DCCP_LISTEN)
356                 return inet_csk_listen_poll(sk);
357
358         /* Socket is not locked. We are protected from async events
359            by poll logic and correct handling of state changes
360            made by another threads is impossible in any case.
361          */
362
363         mask = 0;
364         if (sk->sk_err)
365                 mask = POLLERR;
366
367         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
368                 mask |= POLLHUP;
369         if (sk->sk_shutdown & RCV_SHUTDOWN)
370                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
371
372         /* Connected? */
373         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375                         mask |= POLLIN | POLLRDNORM;
376
377                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379                                 mask |= POLLOUT | POLLWRNORM;
380                         } else {  /* send SIGIO later */
381                                 set_bit(SOCK_ASYNC_NOSPACE,
382                                         &sk->sk_socket->flags);
383                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
384
385                                 /* Race breaker. If space is freed after
386                                  * wspace test but before the flags are set,
387                                  * IO signal will be lost.
388                                  */
389                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390                                         mask |= POLLOUT | POLLWRNORM;
391                         }
392                 }
393         }
394         return mask;
395 }
396
397 EXPORT_SYMBOL_GPL(dccp_poll);
398
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
400 {
401         int rc = -ENOTCONN;
402
403         lock_sock(sk);
404
405         if (sk->sk_state == DCCP_LISTEN)
406                 goto out;
407
408         switch (cmd) {
409         case SIOCINQ: {
410                 struct sk_buff *skb;
411                 unsigned long amount = 0;
412
413                 skb = skb_peek(&sk->sk_receive_queue);
414                 if (skb != NULL) {
415                         /*
416                          * We will only return the amount of this packet since
417                          * that is all that will be read.
418                          */
419                         amount = skb->len;
420                 }
421                 rc = put_user(amount, (int __user *)arg);
422         }
423                 break;
424         default:
425                 rc = -ENOIOCTLCMD;
426                 break;
427         }
428 out:
429         release_sock(sk);
430         return rc;
431 }
432
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
434
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436                                    char __user *optval, int optlen)
437 {
438         struct dccp_sock *dp = dccp_sk(sk);
439         struct dccp_service_list *sl = NULL;
440
441         if (service == DCCP_SERVICE_INVALID_VALUE ||
442             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
443                 return -EINVAL;
444
445         if (optlen > sizeof(service)) {
446                 sl = kmalloc(optlen, GFP_KERNEL);
447                 if (sl == NULL)
448                         return -ENOMEM;
449
450                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451                 if (copy_from_user(sl->dccpsl_list,
452                                    optval + sizeof(service),
453                                    optlen - sizeof(service)) ||
454                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
455                         kfree(sl);
456                         return -EFAULT;
457                 }
458         }
459
460         lock_sock(sk);
461         dp->dccps_service = service;
462
463         kfree(dp->dccps_service_list);
464
465         dp->dccps_service_list = sl;
466         release_sock(sk);
467         return 0;
468 }
469
470 /* byte 1 is feature.  the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472                                   struct dccp_so_feat __user *optval)
473 {
474         struct dccp_so_feat opt;
475         u8 *val;
476         int rc;
477
478         if (copy_from_user(&opt, optval, sizeof(opt)))
479                 return -EFAULT;
480         /*
481          * rfc4340: 6.1. Change Options
482          */
483         if (opt.dccpsf_len < 1)
484                 return -EINVAL;
485
486         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
487         if (!val)
488                 return -ENOMEM;
489
490         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
491                 rc = -EFAULT;
492                 goto out_free_val;
493         }
494
495         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
496                               val, opt.dccpsf_len, GFP_KERNEL);
497         if (rc)
498                 goto out_free_val;
499
500 out:
501         return rc;
502
503 out_free_val:
504         kfree(val);
505         goto out;
506 }
507
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509                 char __user *optval, int optlen)
510 {
511         struct dccp_sock *dp = dccp_sk(sk);
512         int val, err = 0;
513
514         if (optlen < sizeof(int))
515                 return -EINVAL;
516
517         if (get_user(val, (int __user *)optval))
518                 return -EFAULT;
519
520         if (optname == DCCP_SOCKOPT_SERVICE)
521                 return dccp_setsockopt_service(sk, val, optval, optlen);
522
523         lock_sock(sk);
524         switch (optname) {
525         case DCCP_SOCKOPT_PACKET_SIZE:
526                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
527                 err = 0;
528                 break;
529         case DCCP_SOCKOPT_CHANGE_L:
530                 if (optlen != sizeof(struct dccp_so_feat))
531                         err = -EINVAL;
532                 else
533                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
534                                                      (struct dccp_so_feat __user *)
535                                                      optval);
536                 break;
537         case DCCP_SOCKOPT_CHANGE_R:
538                 if (optlen != sizeof(struct dccp_so_feat))
539                         err = -EINVAL;
540                 else
541                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
542                                                      (struct dccp_so_feat __user *)
543                                                      optval);
544                 break;
545         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
546                 if (dp->dccps_role != DCCP_ROLE_SERVER)
547                         err = -EOPNOTSUPP;
548                 else
549                         dp->dccps_server_timewait = (val != 0);
550                 break;
551         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
552                 if (val < 0 || val > 15)
553                         err = -EINVAL;
554                 else
555                         dp->dccps_pcslen = val;
556                 break;
557         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
558                 if (val < 0 || val > 15)
559                         err = -EINVAL;
560                 else {
561                         dp->dccps_pcrlen = val;
562                         /* FIXME: add feature negotiation,
563                          * ChangeL(MinimumChecksumCoverage, val) */
564                 }
565                 break;
566         default:
567                 err = -ENOPROTOOPT;
568                 break;
569         }
570
571         release_sock(sk);
572         return err;
573 }
574
575 int dccp_setsockopt(struct sock *sk, int level, int optname,
576                     char __user *optval, int optlen)
577 {
578         if (level != SOL_DCCP)
579                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
580                                                              optname, optval,
581                                                              optlen);
582         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
583 }
584
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
586
587 #ifdef CONFIG_COMPAT
588 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
589                            char __user *optval, int optlen)
590 {
591         if (level != SOL_DCCP)
592                 return inet_csk_compat_setsockopt(sk, level, optname,
593                                                   optval, optlen);
594         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
595 }
596
597 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
598 #endif
599
600 static int dccp_getsockopt_service(struct sock *sk, int len,
601                                    __be32 __user *optval,
602                                    int __user *optlen)
603 {
604         const struct dccp_sock *dp = dccp_sk(sk);
605         const struct dccp_service_list *sl;
606         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
607
608         lock_sock(sk);
609         if ((sl = dp->dccps_service_list) != NULL) {
610                 slen = sl->dccpsl_nr * sizeof(u32);
611                 total_len += slen;
612         }
613
614         err = -EINVAL;
615         if (total_len > len)
616                 goto out;
617
618         err = 0;
619         if (put_user(total_len, optlen) ||
620             put_user(dp->dccps_service, optval) ||
621             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
622                 err = -EFAULT;
623 out:
624         release_sock(sk);
625         return err;
626 }
627
628 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
629                     char __user *optval, int __user *optlen)
630 {
631         struct dccp_sock *dp;
632         int val, len;
633
634         if (get_user(len, optlen))
635                 return -EFAULT;
636
637         if (len < (int)sizeof(int))
638                 return -EINVAL;
639
640         dp = dccp_sk(sk);
641
642         switch (optname) {
643         case DCCP_SOCKOPT_PACKET_SIZE:
644                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
645                 return 0;
646         case DCCP_SOCKOPT_SERVICE:
647                 return dccp_getsockopt_service(sk, len,
648                                                (__be32 __user *)optval, optlen);
649         case DCCP_SOCKOPT_GET_CUR_MPS:
650                 val = dp->dccps_mss_cache;
651                 break;
652         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
653                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
654         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
655                 val = dp->dccps_server_timewait;
656                 break;
657         case DCCP_SOCKOPT_SEND_CSCOV:
658                 val = dp->dccps_pcslen;
659                 break;
660         case DCCP_SOCKOPT_RECV_CSCOV:
661                 val = dp->dccps_pcrlen;
662                 break;
663         case 128 ... 191:
664                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
665                                              len, (u32 __user *)optval, optlen);
666         case 192 ... 255:
667                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
668                                              len, (u32 __user *)optval, optlen);
669         default:
670                 return -ENOPROTOOPT;
671         }
672
673         len = sizeof(val);
674         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
675                 return -EFAULT;
676
677         return 0;
678 }
679
680 int dccp_getsockopt(struct sock *sk, int level, int optname,
681                     char __user *optval, int __user *optlen)
682 {
683         if (level != SOL_DCCP)
684                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
685                                                              optname, optval,
686                                                              optlen);
687         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688 }
689
690 EXPORT_SYMBOL_GPL(dccp_getsockopt);
691
692 #ifdef CONFIG_COMPAT
693 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
694                            char __user *optval, int __user *optlen)
695 {
696         if (level != SOL_DCCP)
697                 return inet_csk_compat_getsockopt(sk, level, optname,
698                                                   optval, optlen);
699         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700 }
701
702 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
703 #endif
704
705 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
706                  size_t len)
707 {
708         const struct dccp_sock *dp = dccp_sk(sk);
709         const int flags = msg->msg_flags;
710         const int noblock = flags & MSG_DONTWAIT;
711         struct sk_buff *skb;
712         int rc, size;
713         long timeo;
714
715         if (len > dp->dccps_mss_cache)
716                 return -EMSGSIZE;
717
718         lock_sock(sk);
719
720         if (sysctl_dccp_tx_qlen &&
721             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
722                 rc = -EAGAIN;
723                 goto out_release;
724         }
725
726         timeo = sock_sndtimeo(sk, noblock);
727
728         /*
729          * We have to use sk_stream_wait_connect here to set sk_write_pending,
730          * so that the trick in dccp_rcv_request_sent_state_process.
731          */
732         /* Wait for a connection to finish. */
733         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
734                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
735                         goto out_release;
736
737         size = sk->sk_prot->max_header + len;
738         release_sock(sk);
739         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
740         lock_sock(sk);
741         if (skb == NULL)
742                 goto out_release;
743
744         skb_reserve(skb, sk->sk_prot->max_header);
745         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
746         if (rc != 0)
747                 goto out_discard;
748
749         skb_queue_tail(&sk->sk_write_queue, skb);
750         dccp_write_xmit(sk,0);
751 out_release:
752         release_sock(sk);
753         return rc ? : len;
754 out_discard:
755         kfree_skb(skb);
756         goto out_release;
757 }
758
759 EXPORT_SYMBOL_GPL(dccp_sendmsg);
760
761 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
762                  size_t len, int nonblock, int flags, int *addr_len)
763 {
764         const struct dccp_hdr *dh;
765         long timeo;
766
767         lock_sock(sk);
768
769         if (sk->sk_state == DCCP_LISTEN) {
770                 len = -ENOTCONN;
771                 goto out;
772         }
773
774         timeo = sock_rcvtimeo(sk, nonblock);
775
776         do {
777                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
778
779                 if (skb == NULL)
780                         goto verify_sock_status;
781
782                 dh = dccp_hdr(skb);
783
784                 switch (dh->dccph_type) {
785                 case DCCP_PKT_DATA:
786                 case DCCP_PKT_DATAACK:
787                         goto found_ok_skb;
788
789                 case DCCP_PKT_CLOSE:
790                 case DCCP_PKT_CLOSEREQ:
791                         if (!(flags & MSG_PEEK))
792                                 dccp_finish_passive_close(sk);
793                         /* fall through */
794                 case DCCP_PKT_RESET:
795                         dccp_pr_debug("found fin (%s) ok!\n",
796                                       dccp_packet_name(dh->dccph_type));
797                         len = 0;
798                         goto found_fin_ok;
799                 default:
800                         dccp_pr_debug("packet_type=%s\n",
801                                       dccp_packet_name(dh->dccph_type));
802                         sk_eat_skb(sk, skb, 0);
803                 }
804 verify_sock_status:
805                 if (sock_flag(sk, SOCK_DONE)) {
806                         len = 0;
807                         break;
808                 }
809
810                 if (sk->sk_err) {
811                         len = sock_error(sk);
812                         break;
813                 }
814
815                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
816                         len = 0;
817                         break;
818                 }
819
820                 if (sk->sk_state == DCCP_CLOSED) {
821                         if (!sock_flag(sk, SOCK_DONE)) {
822                                 /* This occurs when user tries to read
823                                  * from never connected socket.
824                                  */
825                                 len = -ENOTCONN;
826                                 break;
827                         }
828                         len = 0;
829                         break;
830                 }
831
832                 if (!timeo) {
833                         len = -EAGAIN;
834                         break;
835                 }
836
837                 if (signal_pending(current)) {
838                         len = sock_intr_errno(timeo);
839                         break;
840                 }
841
842                 sk_wait_data(sk, &timeo);
843                 continue;
844         found_ok_skb:
845                 if (len > skb->len)
846                         len = skb->len;
847                 else if (len < skb->len)
848                         msg->msg_flags |= MSG_TRUNC;
849
850                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
851                         /* Exception. Bailout! */
852                         len = -EFAULT;
853                         break;
854                 }
855         found_fin_ok:
856                 if (!(flags & MSG_PEEK))
857                         sk_eat_skb(sk, skb, 0);
858                 break;
859         } while (1);
860 out:
861         release_sock(sk);
862         return len;
863 }
864
865 EXPORT_SYMBOL_GPL(dccp_recvmsg);
866
867 int inet_dccp_listen(struct socket *sock, int backlog)
868 {
869         struct sock *sk = sock->sk;
870         unsigned char old_state;
871         int err;
872
873         lock_sock(sk);
874
875         err = -EINVAL;
876         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
877                 goto out;
878
879         old_state = sk->sk_state;
880         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
881                 goto out;
882
883         /* Really, if the socket is already in listen state
884          * we can only allow the backlog to be adjusted.
885          */
886         if (old_state != DCCP_LISTEN) {
887                 /*
888                  * FIXME: here it probably should be sk->sk_prot->listen_start
889                  * see tcp_listen_start
890                  */
891                 err = dccp_listen_start(sk, backlog);
892                 if (err)
893                         goto out;
894         }
895         sk->sk_max_ack_backlog = backlog;
896         err = 0;
897
898 out:
899         release_sock(sk);
900         return err;
901 }
902
903 EXPORT_SYMBOL_GPL(inet_dccp_listen);
904
905 static void dccp_terminate_connection(struct sock *sk)
906 {
907         u8 next_state = DCCP_CLOSED;
908
909         switch (sk->sk_state) {
910         case DCCP_PASSIVE_CLOSE:
911         case DCCP_PASSIVE_CLOSEREQ:
912                 dccp_finish_passive_close(sk);
913                 break;
914         case DCCP_PARTOPEN:
915                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
916                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
917                 /* fall through */
918         case DCCP_OPEN:
919                 dccp_send_close(sk, 1);
920
921                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
922                     !dccp_sk(sk)->dccps_server_timewait)
923                         next_state = DCCP_ACTIVE_CLOSEREQ;
924                 else
925                         next_state = DCCP_CLOSING;
926                 /* fall through */
927         default:
928                 dccp_set_state(sk, next_state);
929         }
930 }
931
932 void dccp_close(struct sock *sk, long timeout)
933 {
934         struct dccp_sock *dp = dccp_sk(sk);
935         struct sk_buff *skb;
936         u32 data_was_unread = 0;
937         int state;
938
939         lock_sock(sk);
940
941         sk->sk_shutdown = SHUTDOWN_MASK;
942
943         if (sk->sk_state == DCCP_LISTEN) {
944                 dccp_set_state(sk, DCCP_CLOSED);
945
946                 /* Special case. */
947                 inet_csk_listen_stop(sk);
948
949                 goto adjudge_to_death;
950         }
951
952         sk_stop_timer(sk, &dp->dccps_xmit_timer);
953
954         /*
955          * We need to flush the recv. buffs.  We do this only on the
956          * descriptor close, not protocol-sourced closes, because the
957           *reader process may not have drained the data yet!
958          */
959         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
960                 data_was_unread += skb->len;
961                 __kfree_skb(skb);
962         }
963
964         if (data_was_unread) {
965                 /* Unread data was tossed, send an appropriate Reset Code */
966                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
967                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
968                 dccp_set_state(sk, DCCP_CLOSED);
969         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
970                 /* Check zero linger _after_ checking for unread data. */
971                 sk->sk_prot->disconnect(sk, 0);
972         } else if (sk->sk_state != DCCP_CLOSED) {
973                 dccp_terminate_connection(sk);
974         }
975
976         sk_stream_wait_close(sk, timeout);
977
978 adjudge_to_death:
979         state = sk->sk_state;
980         sock_hold(sk);
981         sock_orphan(sk);
982         atomic_inc(sk->sk_prot->orphan_count);
983
984         /*
985          * It is the last release_sock in its life. It will remove backlog.
986          */
987         release_sock(sk);
988         /*
989          * Now socket is owned by kernel and we acquire BH lock
990          * to finish close. No need to check for user refs.
991          */
992         local_bh_disable();
993         bh_lock_sock(sk);
994         WARN_ON(sock_owned_by_user(sk));
995
996         /* Have we already been destroyed by a softirq or backlog? */
997         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
998                 goto out;
999
1000         if (sk->sk_state == DCCP_CLOSED)
1001                 inet_csk_destroy_sock(sk);
1002
1003         /* Otherwise, socket is reprieved until protocol close. */
1004
1005 out:
1006         bh_unlock_sock(sk);
1007         local_bh_enable();
1008         sock_put(sk);
1009 }
1010
1011 EXPORT_SYMBOL_GPL(dccp_close);
1012
1013 void dccp_shutdown(struct sock *sk, int how)
1014 {
1015         dccp_pr_debug("called shutdown(%x)\n", how);
1016 }
1017
1018 EXPORT_SYMBOL_GPL(dccp_shutdown);
1019
1020 static inline int dccp_mib_init(void)
1021 {
1022         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1023 }
1024
1025 static inline void dccp_mib_exit(void)
1026 {
1027         snmp_mib_free((void**)dccp_statistics);
1028 }
1029
1030 static int thash_entries;
1031 module_param(thash_entries, int, 0444);
1032 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1033
1034 #ifdef CONFIG_IP_DCCP_DEBUG
1035 int dccp_debug;
1036 module_param(dccp_debug, bool, 0644);
1037 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1038
1039 EXPORT_SYMBOL_GPL(dccp_debug);
1040 #endif
1041
1042 static int __init dccp_init(void)
1043 {
1044         unsigned long goal;
1045         int ehash_order, bhash_order, i;
1046         int rc = -ENOBUFS;
1047
1048         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1049                      FIELD_SIZEOF(struct sk_buff, cb));
1050
1051         dccp_hashinfo.bind_bucket_cachep =
1052                 kmem_cache_create("dccp_bind_bucket",
1053                                   sizeof(struct inet_bind_bucket), 0,
1054                                   SLAB_HWCACHE_ALIGN, NULL);
1055         if (!dccp_hashinfo.bind_bucket_cachep)
1056                 goto out;
1057
1058         /*
1059          * Size and allocate the main established and bind bucket
1060          * hash tables.
1061          *
1062          * The methodology is similar to that of the buffer cache.
1063          */
1064         if (num_physpages >= (128 * 1024))
1065                 goal = num_physpages >> (21 - PAGE_SHIFT);
1066         else
1067                 goal = num_physpages >> (23 - PAGE_SHIFT);
1068
1069         if (thash_entries)
1070                 goal = (thash_entries *
1071                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1072         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1073                 ;
1074         do {
1075                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1076                                         sizeof(struct inet_ehash_bucket);
1077                 while (dccp_hashinfo.ehash_size &
1078                        (dccp_hashinfo.ehash_size - 1))
1079                         dccp_hashinfo.ehash_size--;
1080                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1081                         __get_free_pages(GFP_ATOMIC, ehash_order);
1082         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1083
1084         if (!dccp_hashinfo.ehash) {
1085                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1086                 goto out_free_bind_bucket_cachep;
1087         }
1088
1089         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1090                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1091                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1092         }
1093
1094         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1095                         goto out_free_dccp_ehash;
1096
1097         bhash_order = ehash_order;
1098
1099         do {
1100                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1101                                         sizeof(struct inet_bind_hashbucket);
1102                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1103                     bhash_order > 0)
1104                         continue;
1105                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1106                         __get_free_pages(GFP_ATOMIC, bhash_order);
1107         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1108
1109         if (!dccp_hashinfo.bhash) {
1110                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1111                 goto out_free_dccp_locks;
1112         }
1113
1114         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1115                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1116                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1117         }
1118
1119         rc = dccp_mib_init();
1120         if (rc)
1121                 goto out_free_dccp_bhash;
1122
1123         rc = dccp_ackvec_init();
1124         if (rc)
1125                 goto out_free_dccp_mib;
1126
1127         rc = dccp_sysctl_init();
1128         if (rc)
1129                 goto out_ackvec_exit;
1130
1131         dccp_timestamping_init();
1132 out:
1133         return rc;
1134 out_ackvec_exit:
1135         dccp_ackvec_exit();
1136 out_free_dccp_mib:
1137         dccp_mib_exit();
1138 out_free_dccp_bhash:
1139         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1140         dccp_hashinfo.bhash = NULL;
1141 out_free_dccp_locks:
1142         inet_ehash_locks_free(&dccp_hashinfo);
1143 out_free_dccp_ehash:
1144         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1145         dccp_hashinfo.ehash = NULL;
1146 out_free_bind_bucket_cachep:
1147         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1148         dccp_hashinfo.bind_bucket_cachep = NULL;
1149         goto out;
1150 }
1151
1152 static void __exit dccp_fini(void)
1153 {
1154         dccp_mib_exit();
1155         free_pages((unsigned long)dccp_hashinfo.bhash,
1156                    get_order(dccp_hashinfo.bhash_size *
1157                              sizeof(struct inet_bind_hashbucket)));
1158         free_pages((unsigned long)dccp_hashinfo.ehash,
1159                    get_order(dccp_hashinfo.ehash_size *
1160                              sizeof(struct inet_ehash_bucket)));
1161         inet_ehash_locks_free(&dccp_hashinfo);
1162         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1163         dccp_ackvec_exit();
1164         dccp_sysctl_exit();
1165 }
1166
1167 module_init(dccp_init);
1168 module_exit(dccp_fini);
1169
1170 MODULE_LICENSE("GPL");
1171 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1172 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");