]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/dccp/proto.c
[IRDA] donauboe: locking fix
[net-next-2.6.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
27#include <net/ip.h>
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
37#include <linux/dccp.h>
38
39#include "ccid.h"
40#include "dccp.h"
41
ba89966c 42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876
ACM
43
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49};
50
51const char *dccp_packet_name(const int type)
52{
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70}
71
72EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74const char *dccp_state_name(const int state)
75{
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91}
92
93EXPORT_SYMBOL_GPL(dccp_state_name);
94
95static inline int dccp_listen_start(struct sock *sk)
96{
67e6b629
ACM
97 struct dccp_sock *dp = dccp_sk(sk);
98
99 dp->dccps_role = DCCP_ROLE_LISTEN;
100 /*
101 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 * before calling listen()
103 */
104 if (dccp_service_not_initialized(sk))
105 return -EPROTO;
7c657876
ACM
106 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107}
108
109int dccp_disconnect(struct sock *sk, int flags)
110{
111 struct inet_connection_sock *icsk = inet_csk(sk);
112 struct inet_sock *inet = inet_sk(sk);
113 int err = 0;
114 const int old_state = sk->sk_state;
115
116 if (old_state != DCCP_CLOSED)
117 dccp_set_state(sk, DCCP_CLOSED);
118
119 /* ABORT function of RFC793 */
120 if (old_state == DCCP_LISTEN) {
121 inet_csk_listen_stop(sk);
122 /* FIXME: do the active reset thing */
123 } else if (old_state == DCCP_REQUESTING)
124 sk->sk_err = ECONNRESET;
125
126 dccp_clear_xmit_timers(sk);
127 __skb_queue_purge(&sk->sk_receive_queue);
128 if (sk->sk_send_head != NULL) {
129 __kfree_skb(sk->sk_send_head);
130 sk->sk_send_head = NULL;
131 }
132
133 inet->dport = 0;
134
135 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136 inet_reset_saddr(sk);
137
138 sk->sk_shutdown = 0;
139 sock_reset_flag(sk, SOCK_DONE);
140
141 icsk->icsk_backoff = 0;
142 inet_csk_delack_init(sk);
143 __sk_dst_reset(sk);
144
145 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147 sk->sk_error_report(sk);
148 return err;
149}
150
331968bd
ACM
151/*
152 * Wait for a DCCP event.
153 *
154 * Note that we don't need to lock the socket, as the upper poll layers
155 * take care of normal races (between the test and the event) and we don't
156 * go look at any of the socket buffers directly.
157 */
158static unsigned int dccp_poll(struct file *file, struct socket *sock,
159 poll_table *wait)
160{
161 unsigned int mask;
162 struct sock *sk = sock->sk;
163
164 poll_wait(file, sk->sk_sleep, wait);
165 if (sk->sk_state == DCCP_LISTEN)
166 return inet_csk_listen_poll(sk);
167
168 /* Socket is not locked. We are protected from async events
169 by poll logic and correct handling of state changes
170 made by another threads is impossible in any case.
171 */
172
173 mask = 0;
174 if (sk->sk_err)
175 mask = POLLERR;
176
177 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178 mask |= POLLHUP;
179 if (sk->sk_shutdown & RCV_SHUTDOWN)
180 mask |= POLLIN | POLLRDNORM;
181
182 /* Connected? */
183 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185 mask |= POLLIN | POLLRDNORM;
186
187 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189 mask |= POLLOUT | POLLWRNORM;
190 } else { /* send SIGIO later */
191 set_bit(SOCK_ASYNC_NOSPACE,
192 &sk->sk_socket->flags);
193 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195 /* Race breaker. If space is freed after
196 * wspace test but before the flags are set,
197 * IO signal will be lost.
198 */
199 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200 mask |= POLLOUT | POLLWRNORM;
201 }
202 }
203 }
204 return mask;
205}
206
7c657876
ACM
207int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208{
209 dccp_pr_debug("entry\n");
210 return -ENOIOCTLCMD;
211}
212
67e6b629
ACM
213static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 char __user *optval, int optlen)
215{
216 struct dccp_sock *dp = dccp_sk(sk);
217 struct dccp_service_list *sl = NULL;
218
219 if (service == DCCP_SERVICE_INVALID_VALUE ||
220 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 return -EINVAL;
222
223 if (optlen > sizeof(service)) {
224 sl = kmalloc(optlen, GFP_KERNEL);
225 if (sl == NULL)
226 return -ENOMEM;
227
228 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 if (copy_from_user(sl->dccpsl_list,
230 optval + sizeof(service),
231 optlen - sizeof(service)) ||
232 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 kfree(sl);
234 return -EFAULT;
235 }
236 }
237
238 lock_sock(sk);
239 dp->dccps_service = service;
240
241 if (dp->dccps_service_list != NULL)
242 kfree(dp->dccps_service_list);
243
244 dp->dccps_service_list = sl;
245 release_sock(sk);
246 return 0;
247}
248
7c657876 249int dccp_setsockopt(struct sock *sk, int level, int optname,
a1d3a355 250 char __user *optval, int optlen)
7c657876 251{
a84ffe43
ACM
252 struct dccp_sock *dp;
253 int err;
254 int val;
7c657876
ACM
255
256 if (level != SOL_DCCP)
257 return ip_setsockopt(sk, level, optname, optval, optlen);
258
a84ffe43
ACM
259 if (optlen < sizeof(int))
260 return -EINVAL;
261
262 if (get_user(val, (int __user *)optval))
263 return -EFAULT;
264
67e6b629
ACM
265 if (optname == DCCP_SOCKOPT_SERVICE)
266 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 267
67e6b629 268 lock_sock(sk);
a84ffe43
ACM
269 dp = dccp_sk(sk);
270 err = 0;
271
272 switch (optname) {
273 case DCCP_SOCKOPT_PACKET_SIZE:
274 dp->dccps_packet_size = val;
275 break;
276 default:
277 err = -ENOPROTOOPT;
278 break;
279 }
280
281 release_sock(sk);
282 return err;
7c657876
ACM
283}
284
67e6b629
ACM
285static int dccp_getsockopt_service(struct sock *sk, int len,
286 u32 __user *optval,
287 int __user *optlen)
288{
289 const struct dccp_sock *dp = dccp_sk(sk);
290 const struct dccp_service_list *sl;
291 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293 lock_sock(sk);
294 if (dccp_service_not_initialized(sk))
295 goto out;
296
297 if ((sl = dp->dccps_service_list) != NULL) {
298 slen = sl->dccpsl_nr * sizeof(u32);
299 total_len += slen;
300 }
301
302 err = -EINVAL;
303 if (total_len > len)
304 goto out;
305
306 err = 0;
307 if (put_user(total_len, optlen) ||
308 put_user(dp->dccps_service, optval) ||
309 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310 err = -EFAULT;
311out:
312 release_sock(sk);
313 return err;
314}
315
7c657876 316int dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 317 char __user *optval, int __user *optlen)
7c657876 318{
a84ffe43
ACM
319 struct dccp_sock *dp;
320 int val, len;
7c657876
ACM
321
322 if (level != SOL_DCCP)
323 return ip_getsockopt(sk, level, optname, optval, optlen);
324
a84ffe43
ACM
325 if (get_user(len, optlen))
326 return -EFAULT;
327
88f964db 328 if (len < sizeof(int))
a84ffe43
ACM
329 return -EINVAL;
330
331 dp = dccp_sk(sk);
332
333 switch (optname) {
334 case DCCP_SOCKOPT_PACKET_SIZE:
335 val = dp->dccps_packet_size;
88f964db 336 len = sizeof(dp->dccps_packet_size);
a84ffe43 337 break;
88f964db
ACM
338 case DCCP_SOCKOPT_SERVICE:
339 return dccp_getsockopt_service(sk, len,
340 (u32 __user *)optval, optlen);
341 case 128 ... 191:
342 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343 len, (u32 __user *)optval, optlen);
344 case 192 ... 255:
345 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
347 default:
348 return -ENOPROTOOPT;
349 }
350
351 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
352 return -EFAULT;
353
354 return 0;
7c657876
ACM
355}
356
357int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
358 size_t len)
359{
360 const struct dccp_sock *dp = dccp_sk(sk);
361 const int flags = msg->msg_flags;
362 const int noblock = flags & MSG_DONTWAIT;
363 struct sk_buff *skb;
364 int rc, size;
365 long timeo;
366
367 if (len > dp->dccps_mss_cache)
368 return -EMSGSIZE;
369
370 lock_sock(sk);
27258ee5 371 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
372
373 /*
374 * We have to use sk_stream_wait_connect here to set sk_write_pending,
375 * so that the trick in dccp_rcv_request_sent_state_process.
376 */
377 /* Wait for a connection to finish. */
378 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
379 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 380 goto out_release;
7c657876
ACM
381
382 size = sk->sk_prot->max_header + len;
383 release_sock(sk);
384 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
385 lock_sock(sk);
7c657876
ACM
386 if (skb == NULL)
387 goto out_release;
388
389 skb_reserve(skb, sk->sk_prot->max_header);
390 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
391 if (rc != 0)
392 goto out_discard;
393
d6809c12 394 rc = dccp_write_xmit(sk, skb, &timeo);
20472af9
ACM
395 /*
396 * XXX we don't use sk_write_queue, so just discard the packet.
397 * Current plan however is to _use_ sk_write_queue with
398 * an algorith similar to tcp_sendmsg, where the main difference
399 * is that in DCCP we have to respect packet boundaries, so
400 * no coalescing of skbs.
401 *
402 * This bug was _quickly_ found & fixed by just looking at an OSTRA
403 * generated callgraph 8) -acme
404 */
7c657876
ACM
405out_release:
406 release_sock(sk);
407 return rc ? : len;
27258ee5
ACM
408out_discard:
409 kfree_skb(skb);
7c657876 410 goto out_release;
7c657876
ACM
411}
412
7c657876
ACM
413int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
414 size_t len, int nonblock, int flags, int *addr_len)
415{
416 const struct dccp_hdr *dh;
7c657876
ACM
417 long timeo;
418
419 lock_sock(sk);
420
531669a0
ACM
421 if (sk->sk_state == DCCP_LISTEN) {
422 len = -ENOTCONN;
7c657876 423 goto out;
7c657876 424 }
7c657876 425
531669a0 426 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
427
428 do {
531669a0 429 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 430
531669a0
ACM
431 if (skb == NULL)
432 goto verify_sock_status;
7c657876 433
531669a0 434 dh = dccp_hdr(skb);
7c657876 435
531669a0
ACM
436 if (dh->dccph_type == DCCP_PKT_DATA ||
437 dh->dccph_type == DCCP_PKT_DATAACK)
438 goto found_ok_skb;
7c657876 439
531669a0
ACM
440 if (dh->dccph_type == DCCP_PKT_RESET ||
441 dh->dccph_type == DCCP_PKT_CLOSE) {
442 dccp_pr_debug("found fin ok!\n");
443 len = 0;
444 goto found_fin_ok;
445 }
446 dccp_pr_debug("packet_type=%s\n",
447 dccp_packet_name(dh->dccph_type));
448 sk_eat_skb(sk, skb);
449verify_sock_status:
450 if (sock_flag(sk, SOCK_DONE)) {
451 len = 0;
7c657876 452 break;
531669a0 453 }
7c657876 454
531669a0
ACM
455 if (sk->sk_err) {
456 len = sock_error(sk);
457 break;
458 }
7c657876 459
531669a0
ACM
460 if (sk->sk_shutdown & RCV_SHUTDOWN) {
461 len = 0;
462 break;
463 }
7c657876 464
531669a0
ACM
465 if (sk->sk_state == DCCP_CLOSED) {
466 if (!sock_flag(sk, SOCK_DONE)) {
467 /* This occurs when user tries to read
468 * from never connected socket.
469 */
470 len = -ENOTCONN;
7c657876
ACM
471 break;
472 }
531669a0
ACM
473 len = 0;
474 break;
7c657876
ACM
475 }
476
531669a0
ACM
477 if (!timeo) {
478 len = -EAGAIN;
479 break;
480 }
7c657876 481
531669a0
ACM
482 if (signal_pending(current)) {
483 len = sock_intr_errno(timeo);
484 break;
485 }
7c657876 486
531669a0 487 sk_wait_data(sk, &timeo);
7c657876 488 continue;
7c657876 489 found_ok_skb:
531669a0
ACM
490 if (len > skb->len)
491 len = skb->len;
492 else if (len < skb->len)
493 msg->msg_flags |= MSG_TRUNC;
494
495 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
496 /* Exception. Bailout! */
497 len = -EFAULT;
498 break;
7c657876 499 }
7c657876
ACM
500 found_fin_ok:
501 if (!(flags & MSG_PEEK))
502 sk_eat_skb(sk, skb);
503 break;
531669a0 504 } while (1);
7c657876
ACM
505out:
506 release_sock(sk);
531669a0 507 return len;
7c657876
ACM
508}
509
510static int inet_dccp_listen(struct socket *sock, int backlog)
511{
512 struct sock *sk = sock->sk;
513 unsigned char old_state;
514 int err;
515
516 lock_sock(sk);
517
518 err = -EINVAL;
519 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
520 goto out;
521
522 old_state = sk->sk_state;
523 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
524 goto out;
525
526 /* Really, if the socket is already in listen state
527 * we can only allow the backlog to be adjusted.
528 */
529 if (old_state != DCCP_LISTEN) {
530 /*
531 * FIXME: here it probably should be sk->sk_prot->listen_start
532 * see tcp_listen_start
533 */
534 err = dccp_listen_start(sk);
535 if (err)
536 goto out;
537 }
538 sk->sk_max_ack_backlog = backlog;
539 err = 0;
540
541out:
542 release_sock(sk);
543 return err;
544}
545
546static const unsigned char dccp_new_state[] = {
7690af3f
ACM
547 /* current state: new state: action: */
548 [0] = DCCP_CLOSED,
549 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
550 [DCCP_REQUESTING] = DCCP_CLOSED,
551 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
552 [DCCP_LISTEN] = DCCP_CLOSED,
553 [DCCP_RESPOND] = DCCP_CLOSED,
554 [DCCP_CLOSING] = DCCP_CLOSED,
555 [DCCP_TIME_WAIT] = DCCP_CLOSED,
556 [DCCP_CLOSED] = DCCP_CLOSED,
7c657876
ACM
557};
558
559static int dccp_close_state(struct sock *sk)
560{
561 const int next = dccp_new_state[sk->sk_state];
562 const int ns = next & DCCP_STATE_MASK;
563
564 if (ns != sk->sk_state)
565 dccp_set_state(sk, ns);
566
567 return next & DCCP_ACTION_FIN;
568}
569
570void dccp_close(struct sock *sk, long timeout)
571{
572 struct sk_buff *skb;
573
574 lock_sock(sk);
575
576 sk->sk_shutdown = SHUTDOWN_MASK;
577
578 if (sk->sk_state == DCCP_LISTEN) {
579 dccp_set_state(sk, DCCP_CLOSED);
580
581 /* Special case. */
582 inet_csk_listen_stop(sk);
583
584 goto adjudge_to_death;
585 }
586
587 /*
588 * We need to flush the recv. buffs. We do this only on the
589 * descriptor close, not protocol-sourced closes, because the
590 *reader process may not have drained the data yet!
591 */
592 /* FIXME: check for unread data */
593 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
594 __kfree_skb(skb);
595 }
596
597 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
598 /* Check zero linger _after_ checking for unread data. */
599 sk->sk_prot->disconnect(sk, 0);
600 } else if (dccp_close_state(sk)) {
7ad07e7c 601 dccp_send_close(sk, 1);
7c657876
ACM
602 }
603
604 sk_stream_wait_close(sk, timeout);
605
606adjudge_to_death:
7ad07e7c
ACM
607 /*
608 * It is the last release_sock in its life. It will remove backlog.
609 */
7c657876
ACM
610 release_sock(sk);
611 /*
612 * Now socket is owned by kernel and we acquire BH lock
613 * to finish close. No need to check for user refs.
614 */
615 local_bh_disable();
616 bh_lock_sock(sk);
617 BUG_TRAP(!sock_owned_by_user(sk));
618
619 sock_hold(sk);
620 sock_orphan(sk);
7ad07e7c
ACM
621
622 /*
623 * The last release_sock may have processed the CLOSE or RESET
624 * packet moving sock to CLOSED state, if not we have to fire
625 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
626 * in draft-ietf-dccp-spec-11. -acme
627 */
628 if (sk->sk_state == DCCP_CLOSING) {
629 /* FIXME: should start at 2 * RTT */
630 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
631 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
632 inet_csk(sk)->icsk_rto,
633 DCCP_RTO_MAX);
634#if 0
635 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
7c657876 636 dccp_set_state(sk, DCCP_CLOSED);
7ad07e7c
ACM
637#endif
638 }
7c657876 639
7ad07e7c 640 atomic_inc(sk->sk_prot->orphan_count);
7c657876
ACM
641 if (sk->sk_state == DCCP_CLOSED)
642 inet_csk_destroy_sock(sk);
643
644 /* Otherwise, socket is reprieved until protocol close. */
645
646 bh_unlock_sock(sk);
647 local_bh_enable();
648 sock_put(sk);
649}
650
651void dccp_shutdown(struct sock *sk, int how)
652{
653 dccp_pr_debug("entry\n");
654}
655
a1d3a355 656static struct proto_ops inet_dccp_ops = {
7c657876
ACM
657 .family = PF_INET,
658 .owner = THIS_MODULE,
659 .release = inet_release,
660 .bind = inet_bind,
661 .connect = inet_stream_connect,
662 .socketpair = sock_no_socketpair,
663 .accept = inet_accept,
664 .getname = inet_getname,
331968bd
ACM
665 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
666 .poll = dccp_poll,
7c657876 667 .ioctl = inet_ioctl,
7690af3f
ACM
668 /* FIXME: work on inet_listen to rename it to sock_common_listen */
669 .listen = inet_dccp_listen,
7c657876
ACM
670 .shutdown = inet_shutdown,
671 .setsockopt = sock_common_setsockopt,
672 .getsockopt = sock_common_getsockopt,
673 .sendmsg = inet_sendmsg,
674 .recvmsg = sock_common_recvmsg,
675 .mmap = sock_no_mmap,
676 .sendpage = sock_no_sendpage,
677};
678
679extern struct net_proto_family inet_family_ops;
680
681static struct inet_protosw dccp_v4_protosw = {
682 .type = SOCK_DCCP,
683 .protocol = IPPROTO_DCCP,
684 .prot = &dccp_v4_prot,
685 .ops = &inet_dccp_ops,
686 .capability = -1,
687 .no_check = 0,
688 .flags = 0,
689};
690
691/*
692 * This is the global socket data structure used for responding to
693 * the Out-of-the-blue (OOTB) packets. A control sock will be created
694 * for this socket at the initialization time.
695 */
696struct socket *dccp_ctl_socket;
697
698static char dccp_ctl_socket_err_msg[] __initdata =
699 KERN_ERR "DCCP: Failed to create the control socket.\n";
700
701static int __init dccp_ctl_sock_init(void)
702{
703 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
704 &dccp_ctl_socket);
705 if (rc < 0)
706 printk(dccp_ctl_socket_err_msg);
707 else {
708 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
709 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
710
711 /* Unhash it so that IP input processing does not even
712 * see it, we do not wish this socket to see incoming
713 * packets.
714 */
715 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
716 }
717
718 return rc;
719}
720
725ba8ee
ACM
721#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
722void dccp_ctl_sock_exit(void)
7c657876 723{
5480855b 724 if (dccp_ctl_socket != NULL) {
7c657876 725 sock_release(dccp_ctl_socket);
5480855b
ACM
726 dccp_ctl_socket = NULL;
727 }
7c657876
ACM
728}
729
725ba8ee
ACM
730EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
731#endif
732
7c657876
ACM
733static int __init init_dccp_v4_mibs(void)
734{
735 int rc = -ENOMEM;
736
737 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
738 if (dccp_statistics[0] == NULL)
739 goto out;
740
741 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
742 if (dccp_statistics[1] == NULL)
743 goto out_free_one;
744
745 rc = 0;
746out:
747 return rc;
748out_free_one:
749 free_percpu(dccp_statistics[0]);
750 dccp_statistics[0] = NULL;
751 goto out;
752
753}
754
755static int thash_entries;
756module_param(thash_entries, int, 0444);
757MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
758
a1d3a355 759#ifdef CONFIG_IP_DCCP_DEBUG
7c657876
ACM
760int dccp_debug;
761module_param(dccp_debug, int, 0444);
762MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
a1d3a355 763#endif
7c657876
ACM
764
765static int __init dccp_init(void)
766{
767 unsigned long goal;
768 int ehash_order, bhash_order, i;
769 int rc = proto_register(&dccp_v4_prot, 1);
770
771 if (rc)
772 goto out;
773
7690af3f
ACM
774 dccp_hashinfo.bind_bucket_cachep =
775 kmem_cache_create("dccp_bind_bucket",
776 sizeof(struct inet_bind_bucket), 0,
777 SLAB_HWCACHE_ALIGN, NULL, NULL);
7c657876
ACM
778 if (!dccp_hashinfo.bind_bucket_cachep)
779 goto out_proto_unregister;
780
781 /*
782 * Size and allocate the main established and bind bucket
783 * hash tables.
784 *
785 * The methodology is similar to that of the buffer cache.
786 */
787 if (num_physpages >= (128 * 1024))
788 goal = num_physpages >> (21 - PAGE_SHIFT);
789 else
790 goal = num_physpages >> (23 - PAGE_SHIFT);
791
792 if (thash_entries)
7690af3f
ACM
793 goal = (thash_entries *
794 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
795 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
796 ;
797 do {
798 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
799 sizeof(struct inet_ehash_bucket);
800 dccp_hashinfo.ehash_size >>= 1;
7690af3f
ACM
801 while (dccp_hashinfo.ehash_size &
802 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
803 dccp_hashinfo.ehash_size--;
804 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
805 __get_free_pages(GFP_ATOMIC, ehash_order);
806 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
807
808 if (!dccp_hashinfo.ehash) {
809 printk(KERN_CRIT "Failed to allocate DCCP "
810 "established hash table\n");
811 goto out_free_bind_bucket_cachep;
812 }
813
814 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
815 rwlock_init(&dccp_hashinfo.ehash[i].lock);
816 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
817 }
818
819 bhash_order = ehash_order;
820
821 do {
822 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
823 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
824 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
825 bhash_order > 0)
7c657876
ACM
826 continue;
827 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
828 __get_free_pages(GFP_ATOMIC, bhash_order);
829 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
830
831 if (!dccp_hashinfo.bhash) {
832 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
833 goto out_free_dccp_ehash;
834 }
835
836 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
837 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
838 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
839 }
840
841 if (init_dccp_v4_mibs())
842 goto out_free_dccp_bhash;
843
844 rc = -EAGAIN;
845 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
846 goto out_free_dccp_v4_mibs;
847
848 inet_register_protosw(&dccp_v4_protosw);
849
850 rc = dccp_ctl_sock_init();
851 if (rc)
852 goto out_unregister_protosw;
853out:
854 return rc;
855out_unregister_protosw:
856 inet_unregister_protosw(&dccp_v4_protosw);
857 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
858out_free_dccp_v4_mibs:
859 free_percpu(dccp_statistics[0]);
860 free_percpu(dccp_statistics[1]);
861 dccp_statistics[0] = dccp_statistics[1] = NULL;
862out_free_dccp_bhash:
863 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
864 dccp_hashinfo.bhash = NULL;
865out_free_dccp_ehash:
866 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
867 dccp_hashinfo.ehash = NULL;
868out_free_bind_bucket_cachep:
869 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
870 dccp_hashinfo.bind_bucket_cachep = NULL;
871out_proto_unregister:
872 proto_unregister(&dccp_v4_prot);
873 goto out;
874}
875
876static const char dccp_del_proto_err_msg[] __exitdata =
877 KERN_ERR "can't remove dccp net_protocol\n";
878
879static void __exit dccp_fini(void)
880{
7c657876
ACM
881 inet_unregister_protosw(&dccp_v4_protosw);
882
883 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
884 printk(dccp_del_proto_err_msg);
885
725ba8ee
ACM
886 free_percpu(dccp_statistics[0]);
887 free_percpu(dccp_statistics[1]);
888 free_pages((unsigned long)dccp_hashinfo.bhash,
889 get_order(dccp_hashinfo.bhash_size *
890 sizeof(struct inet_bind_hashbucket)));
891 free_pages((unsigned long)dccp_hashinfo.ehash,
892 get_order(dccp_hashinfo.ehash_size *
893 sizeof(struct inet_ehash_bucket)));
7c657876 894 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
725ba8ee 895 proto_unregister(&dccp_v4_prot);
7c657876
ACM
896}
897
898module_init(dccp_init);
899module_exit(dccp_fini);
900
bb97d31f
ACM
901/*
902 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
903 * values directly, Also cover the case where the protocol is not specified,
904 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
905 */
906MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
907MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
7c657876
ACM
908MODULE_LICENSE("GPL");
909MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
910MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");