]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/dccp/proto.c
dccp: Fill in the Data fields for "Option Error" Resets
[net-next-2.6.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
7c657876
ACM
12#include <linux/dccp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/skbuff.h>
18#include <linux/netdevice.h>
19#include <linux/in.h>
20#include <linux/if_arp.h>
21#include <linux/init.h>
22#include <linux/random.h>
23#include <net/checksum.h>
24
14c85021 25#include <net/inet_sock.h>
7c657876
ACM
26#include <net/sock.h>
27#include <net/xfrm.h>
28
6273172e 29#include <asm/ioctls.h>
7c657876
ACM
30#include <linux/spinlock.h>
31#include <linux/timer.h>
32#include <linux/delay.h>
33#include <linux/poll.h>
7c657876
ACM
34
35#include "ccid.h"
36#include "dccp.h"
afe00251 37#include "feat.h"
7c657876 38
ba89966c 39DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 40
f21e68ca
ACM
41EXPORT_SYMBOL_GPL(dccp_statistics);
42
7c657876
ACM
43atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
f21e68ca
ACM
45EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
075ae866
ACM
47struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51};
52
53EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
b1308dc0
IM
55/* the maximum queue length for tx in packets. 0 is no limit */
56int sysctl_dccp_tx_qlen __read_mostly = 5;
57
c25a18ba
ACM
58void dccp_set_state(struct sock *sk, const int state)
59{
60 const int oldstate = sk->sk_state;
61
f11135a3 62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
c25a18ba
ACM
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
65
66 switch (state) {
67 case DCCP_OPEN:
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 break;
71
72 case DCCP_CLOSED:
0c869620
GR
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
c25a18ba
ACM
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
ab1e0a13 80 inet_put_port(sk);
c25a18ba
ACM
81 /* fall through */
82 default:
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85 }
86
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
89 */
90 sk->sk_state = state;
91}
92
93EXPORT_SYMBOL_GPL(dccp_set_state);
94
0c869620
GR
95static void dccp_finish_passive_close(struct sock *sk)
96{
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
102 break;
103 case DCCP_PASSIVE_CLOSEREQ:
104 /*
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107 */
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
110 }
111}
112
c25a18ba
ACM
113void dccp_done(struct sock *sk)
114{
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
117
118 sk->sk_shutdown = SHUTDOWN_MASK;
119
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
122 else
123 inet_csk_destroy_sock(sk);
124}
125
126EXPORT_SYMBOL_GPL(dccp_done);
127
7c657876
ACM
128const char *dccp_packet_name(const int type)
129{
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
141 };
142
143 if (type >= DCCP_NR_PKT_TYPES)
144 return "INVALID";
145 else
146 return dccp_packet_names[type];
147}
148
149EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151const char *dccp_state_name(const int state)
152{
153 static char *dccp_state_names[] = {
f11135a3
GR
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
7c657876
ACM
165 };
166
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
169 else
170 return dccp_state_names[state];
171}
172
173EXPORT_SYMBOL_GPL(dccp_state_name);
174
72478873 175int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
3e0fadc5
ACM
176{
177 struct dccp_sock *dp = dccp_sk(sk);
a4bf3902 178 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5 179 struct inet_connection_sock *icsk = inet_csk(sk);
3e0fadc5 180
a4bf3902 181 dccp_minisock_init(&dp->dccps_minisock);
3e0fadc5 182
e18d7a98
ACM
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
193
194 dccp_init_xmit_timers(sk);
195
3e0fadc5
ACM
196 /*
197 * FIXME: We're hardcoding the CCID, and doing this at this point makes
198 * the listening (master) sock get CCID control blocks, which is not
199 * necessary, but for now, to not mess with the test userspace apps,
200 * lets leave it here, later the real solution is to do this in a
201 * setsockopt(CCIDs-I-want/accept). -acme
202 */
72478873 203 if (likely(ctl_sock_initialized)) {
8ca0d17b 204 int rc = dccp_feat_init(dmsk);
3e0fadc5
ACM
205
206 if (rc)
207 return rc;
208
a4bf3902 209 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
210 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211 if (dp->dccps_hc_rx_ackvec == NULL)
212 return -ENOMEM;
213 }
a4bf3902
ACM
214 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215 sk, GFP_KERNEL);
216 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217 sk, GFP_KERNEL);
8109b02b 218 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
3e0fadc5
ACM
219 dp->dccps_hc_tx_ccid == NULL)) {
220 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
a4bf3902 222 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
223 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 dp->dccps_hc_rx_ackvec = NULL;
225 }
226 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227 return -ENOMEM;
228 }
229 } else {
230 /* control socket doesn't need feat nego */
a4bf3902
ACM
231 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232 INIT_LIST_HEAD(&dmsk->dccpms_conf);
3e0fadc5
ACM
233 }
234
3e0fadc5
ACM
235 return 0;
236}
237
238EXPORT_SYMBOL_GPL(dccp_init_sock);
239
7d06b2e0 240void dccp_destroy_sock(struct sock *sk)
3e0fadc5
ACM
241{
242 struct dccp_sock *dp = dccp_sk(sk);
8ca0d17b 243 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5
ACM
244
245 /*
246 * DCCP doesn't use sk_write_queue, just sk_send_head
247 * for retransmissions
248 */
249 if (sk->sk_send_head != NULL) {
250 kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
252 }
253
254 /* Clean up a referenced DCCP bind bucket. */
255 if (inet_csk(sk)->icsk_bind_hash != NULL)
ab1e0a13 256 inet_put_port(sk);
3e0fadc5
ACM
257
258 kfree(dp->dccps_service_list);
259 dp->dccps_service_list = NULL;
260
8ca0d17b 261 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
262 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263 dp->dccps_hc_rx_ackvec = NULL;
264 }
265 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269 /* clean up feature negotiation state */
8ca0d17b 270 dccp_feat_clean(dmsk);
3e0fadc5
ACM
271}
272
273EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274
72a3effa 275static inline int dccp_listen_start(struct sock *sk, int backlog)
7c657876 276{
67e6b629
ACM
277 struct dccp_sock *dp = dccp_sk(sk);
278
279 dp->dccps_role = DCCP_ROLE_LISTEN;
72a3effa 280 return inet_csk_listen_start(sk, backlog);
7c657876
ACM
281}
282
ce865a61
GR
283static inline int dccp_need_reset(int state)
284{
285 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286 state != DCCP_REQUESTING;
287}
288
7c657876
ACM
289int dccp_disconnect(struct sock *sk, int flags)
290{
291 struct inet_connection_sock *icsk = inet_csk(sk);
292 struct inet_sock *inet = inet_sk(sk);
293 int err = 0;
294 const int old_state = sk->sk_state;
295
296 if (old_state != DCCP_CLOSED)
297 dccp_set_state(sk, DCCP_CLOSED);
298
ce865a61
GR
299 /*
300 * This corresponds to the ABORT function of RFC793, sec. 3.8
301 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302 */
7c657876
ACM
303 if (old_state == DCCP_LISTEN) {
304 inet_csk_listen_stop(sk);
ce865a61
GR
305 } else if (dccp_need_reset(old_state)) {
306 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307 sk->sk_err = ECONNRESET;
7c657876
ACM
308 } else if (old_state == DCCP_REQUESTING)
309 sk->sk_err = ECONNRESET;
310
311 dccp_clear_xmit_timers(sk);
312 __skb_queue_purge(&sk->sk_receive_queue);
313 if (sk->sk_send_head != NULL) {
314 __kfree_skb(sk->sk_send_head);
315 sk->sk_send_head = NULL;
316 }
317
318 inet->dport = 0;
319
320 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321 inet_reset_saddr(sk);
322
323 sk->sk_shutdown = 0;
324 sock_reset_flag(sk, SOCK_DONE);
325
326 icsk->icsk_backoff = 0;
327 inet_csk_delack_init(sk);
328 __sk_dst_reset(sk);
329
547b792c 330 WARN_ON(inet->num && !icsk->icsk_bind_hash);
7c657876
ACM
331
332 sk->sk_error_report(sk);
333 return err;
334}
335
f21e68ca
ACM
336EXPORT_SYMBOL_GPL(dccp_disconnect);
337
331968bd
ACM
338/*
339 * Wait for a DCCP event.
340 *
341 * Note that we don't need to lock the socket, as the upper poll layers
342 * take care of normal races (between the test and the event) and we don't
343 * go look at any of the socket buffers directly.
344 */
f21e68ca
ACM
345unsigned int dccp_poll(struct file *file, struct socket *sock,
346 poll_table *wait)
331968bd
ACM
347{
348 unsigned int mask;
349 struct sock *sk = sock->sk;
350
351 poll_wait(file, sk->sk_sleep, wait);
352 if (sk->sk_state == DCCP_LISTEN)
353 return inet_csk_listen_poll(sk);
354
355 /* Socket is not locked. We are protected from async events
356 by poll logic and correct handling of state changes
357 made by another threads is impossible in any case.
358 */
359
360 mask = 0;
361 if (sk->sk_err)
362 mask = POLLERR;
363
364 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365 mask |= POLLHUP;
366 if (sk->sk_shutdown & RCV_SHUTDOWN)
f348d70a 367 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
331968bd
ACM
368
369 /* Connected? */
370 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372 mask |= POLLIN | POLLRDNORM;
373
374 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376 mask |= POLLOUT | POLLWRNORM;
377 } else { /* send SIGIO later */
378 set_bit(SOCK_ASYNC_NOSPACE,
379 &sk->sk_socket->flags);
380 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382 /* Race breaker. If space is freed after
383 * wspace test but before the flags are set,
384 * IO signal will be lost.
385 */
386 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387 mask |= POLLOUT | POLLWRNORM;
388 }
389 }
390 }
391 return mask;
392}
393
f21e68ca
ACM
394EXPORT_SYMBOL_GPL(dccp_poll);
395
7c657876
ACM
396int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397{
6273172e
ACM
398 int rc = -ENOTCONN;
399
400 lock_sock(sk);
401
402 if (sk->sk_state == DCCP_LISTEN)
403 goto out;
404
405 switch (cmd) {
406 case SIOCINQ: {
407 struct sk_buff *skb;
408 unsigned long amount = 0;
409
410 skb = skb_peek(&sk->sk_receive_queue);
411 if (skb != NULL) {
412 /*
413 * We will only return the amount of this packet since
414 * that is all that will be read.
415 */
416 amount = skb->len;
417 }
418 rc = put_user(amount, (int __user *)arg);
419 }
420 break;
421 default:
422 rc = -ENOIOCTLCMD;
423 break;
424 }
425out:
426 release_sock(sk);
427 return rc;
7c657876
ACM
428}
429
f21e68ca
ACM
430EXPORT_SYMBOL_GPL(dccp_ioctl);
431
60fe62e7 432static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
433 char __user *optval, int optlen)
434{
435 struct dccp_sock *dp = dccp_sk(sk);
436 struct dccp_service_list *sl = NULL;
437
8109b02b 438 if (service == DCCP_SERVICE_INVALID_VALUE ||
67e6b629
ACM
439 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440 return -EINVAL;
441
442 if (optlen > sizeof(service)) {
443 sl = kmalloc(optlen, GFP_KERNEL);
444 if (sl == NULL)
445 return -ENOMEM;
446
447 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448 if (copy_from_user(sl->dccpsl_list,
449 optval + sizeof(service),
450 optlen - sizeof(service)) ||
451 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452 kfree(sl);
453 return -EFAULT;
454 }
455 }
456
457 lock_sock(sk);
458 dp->dccps_service = service;
459
a51482bd 460 kfree(dp->dccps_service_list);
67e6b629
ACM
461
462 dp->dccps_service_list = sl;
463 release_sock(sk);
464 return 0;
465}
466
afe00251
AB
467/* byte 1 is feature. the rest is the preference list */
468static int dccp_setsockopt_change(struct sock *sk, int type,
469 struct dccp_so_feat __user *optval)
470{
471 struct dccp_so_feat opt;
472 u8 *val;
473 int rc;
474
475 if (copy_from_user(&opt, optval, sizeof(opt)))
476 return -EFAULT;
3e8a0a55
ACM
477 /*
478 * rfc4340: 6.1. Change Options
479 */
480 if (opt.dccpsf_len < 1)
481 return -EINVAL;
afe00251
AB
482
483 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
484 if (!val)
485 return -ENOMEM;
486
487 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
488 rc = -EFAULT;
489 goto out_free_val;
490 }
491
8ca0d17b
ACM
492 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
493 val, opt.dccpsf_len, GFP_KERNEL);
afe00251
AB
494 if (rc)
495 goto out_free_val;
496
497out:
498 return rc;
499
500out_free_val:
501 kfree(val);
502 goto out;
503}
504
3fdadf7d
DM
505static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506 char __user *optval, int optlen)
7c657876 507{
09dbc389
GR
508 struct dccp_sock *dp = dccp_sk(sk);
509 int val, err = 0;
7c657876 510
a84ffe43
ACM
511 if (optlen < sizeof(int))
512 return -EINVAL;
513
514 if (get_user(val, (int __user *)optval))
515 return -EFAULT;
516
67e6b629
ACM
517 if (optname == DCCP_SOCKOPT_SERVICE)
518 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 519
67e6b629 520 lock_sock(sk);
a84ffe43
ACM
521 switch (optname) {
522 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 523 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 524 err = 0;
a84ffe43 525 break;
afe00251
AB
526 case DCCP_SOCKOPT_CHANGE_L:
527 if (optlen != sizeof(struct dccp_so_feat))
528 err = -EINVAL;
529 else
530 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
c9eaf173 531 (struct dccp_so_feat __user *)
afe00251
AB
532 optval);
533 break;
afe00251
AB
534 case DCCP_SOCKOPT_CHANGE_R:
535 if (optlen != sizeof(struct dccp_so_feat))
536 err = -EINVAL;
537 else
538 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
9faefb6d 539 (struct dccp_so_feat __user *)
afe00251
AB
540 optval);
541 break;
b8599d20
GR
542 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
543 if (dp->dccps_role != DCCP_ROLE_SERVER)
544 err = -EOPNOTSUPP;
545 else
546 dp->dccps_server_timewait = (val != 0);
547 break;
6f4e5fff
GR
548 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
549 if (val < 0 || val > 15)
550 err = -EINVAL;
551 else
552 dp->dccps_pcslen = val;
553 break;
554 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
555 if (val < 0 || val > 15)
556 err = -EINVAL;
557 else {
558 dp->dccps_pcrlen = val;
559 /* FIXME: add feature negotiation,
560 * ChangeL(MinimumChecksumCoverage, val) */
561 }
562 break;
a84ffe43
ACM
563 default:
564 err = -ENOPROTOOPT;
565 break;
566 }
6f4e5fff 567
a84ffe43
ACM
568 release_sock(sk);
569 return err;
7c657876
ACM
570}
571
3fdadf7d
DM
572int dccp_setsockopt(struct sock *sk, int level, int optname,
573 char __user *optval, int optlen)
574{
575 if (level != SOL_DCCP)
576 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577 optname, optval,
578 optlen);
579 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580}
543d9cfe 581
f21e68ca
ACM
582EXPORT_SYMBOL_GPL(dccp_setsockopt);
583
3fdadf7d
DM
584#ifdef CONFIG_COMPAT
585int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
543d9cfe 586 char __user *optval, int optlen)
3fdadf7d 587{
dec73ff0
ACM
588 if (level != SOL_DCCP)
589 return inet_csk_compat_setsockopt(sk, level, optname,
590 optval, optlen);
3fdadf7d
DM
591 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592}
543d9cfe 593
3fdadf7d
DM
594EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595#endif
596
67e6b629 597static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 598 __be32 __user *optval,
67e6b629
ACM
599 int __user *optlen)
600{
601 const struct dccp_sock *dp = dccp_sk(sk);
602 const struct dccp_service_list *sl;
603 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604
605 lock_sock(sk);
67e6b629
ACM
606 if ((sl = dp->dccps_service_list) != NULL) {
607 slen = sl->dccpsl_nr * sizeof(u32);
608 total_len += slen;
609 }
610
611 err = -EINVAL;
612 if (total_len > len)
613 goto out;
614
615 err = 0;
616 if (put_user(total_len, optlen) ||
617 put_user(dp->dccps_service, optval) ||
618 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
619 err = -EFAULT;
620out:
621 release_sock(sk);
622 return err;
623}
624
3fdadf7d 625static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 626 char __user *optval, int __user *optlen)
7c657876 627{
a84ffe43
ACM
628 struct dccp_sock *dp;
629 int val, len;
7c657876 630
a84ffe43
ACM
631 if (get_user(len, optlen))
632 return -EFAULT;
633
39ebc027 634 if (len < (int)sizeof(int))
a84ffe43
ACM
635 return -EINVAL;
636
637 dp = dccp_sk(sk);
638
639 switch (optname) {
640 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 641 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 642 return 0;
88f964db
ACM
643 case DCCP_SOCKOPT_SERVICE:
644 return dccp_getsockopt_service(sk, len,
60fe62e7 645 (__be32 __user *)optval, optlen);
7c559a9e
GR
646 case DCCP_SOCKOPT_GET_CUR_MPS:
647 val = dp->dccps_mss_cache;
7c559a9e 648 break;
b8599d20
GR
649 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
650 val = dp->dccps_server_timewait;
b8599d20 651 break;
6f4e5fff
GR
652 case DCCP_SOCKOPT_SEND_CSCOV:
653 val = dp->dccps_pcslen;
654 break;
655 case DCCP_SOCKOPT_RECV_CSCOV:
656 val = dp->dccps_pcrlen;
657 break;
88f964db
ACM
658 case 128 ... 191:
659 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
660 len, (u32 __user *)optval, optlen);
661 case 192 ... 255:
662 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
663 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
664 default:
665 return -ENOPROTOOPT;
666 }
667
79133506 668 len = sizeof(val);
a84ffe43
ACM
669 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
670 return -EFAULT;
671
672 return 0;
7c657876
ACM
673}
674
3fdadf7d
DM
675int dccp_getsockopt(struct sock *sk, int level, int optname,
676 char __user *optval, int __user *optlen)
677{
678 if (level != SOL_DCCP)
679 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680 optname, optval,
681 optlen);
682 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683}
543d9cfe 684
f21e68ca
ACM
685EXPORT_SYMBOL_GPL(dccp_getsockopt);
686
3fdadf7d
DM
687#ifdef CONFIG_COMPAT
688int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
543d9cfe 689 char __user *optval, int __user *optlen)
3fdadf7d 690{
dec73ff0
ACM
691 if (level != SOL_DCCP)
692 return inet_csk_compat_getsockopt(sk, level, optname,
693 optval, optlen);
3fdadf7d
DM
694 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695}
543d9cfe 696
3fdadf7d
DM
697EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698#endif
699
7c657876
ACM
700int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
701 size_t len)
702{
703 const struct dccp_sock *dp = dccp_sk(sk);
704 const int flags = msg->msg_flags;
705 const int noblock = flags & MSG_DONTWAIT;
706 struct sk_buff *skb;
707 int rc, size;
708 long timeo;
709
710 if (len > dp->dccps_mss_cache)
711 return -EMSGSIZE;
712
713 lock_sock(sk);
b1308dc0
IM
714
715 if (sysctl_dccp_tx_qlen &&
716 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
717 rc = -EAGAIN;
718 goto out_release;
719 }
720
27258ee5 721 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
722
723 /*
724 * We have to use sk_stream_wait_connect here to set sk_write_pending,
725 * so that the trick in dccp_rcv_request_sent_state_process.
726 */
727 /* Wait for a connection to finish. */
cecd8d0e 728 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
7c657876 729 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 730 goto out_release;
7c657876
ACM
731
732 size = sk->sk_prot->max_header + len;
733 release_sock(sk);
734 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
735 lock_sock(sk);
7c657876
ACM
736 if (skb == NULL)
737 goto out_release;
738
739 skb_reserve(skb, sk->sk_prot->max_header);
740 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
741 if (rc != 0)
742 goto out_discard;
743
97e5848d
IM
744 skb_queue_tail(&sk->sk_write_queue, skb);
745 dccp_write_xmit(sk,0);
7c657876
ACM
746out_release:
747 release_sock(sk);
748 return rc ? : len;
27258ee5
ACM
749out_discard:
750 kfree_skb(skb);
7c657876 751 goto out_release;
7c657876
ACM
752}
753
f21e68ca
ACM
754EXPORT_SYMBOL_GPL(dccp_sendmsg);
755
7c657876
ACM
756int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
757 size_t len, int nonblock, int flags, int *addr_len)
758{
759 const struct dccp_hdr *dh;
7c657876
ACM
760 long timeo;
761
762 lock_sock(sk);
763
531669a0
ACM
764 if (sk->sk_state == DCCP_LISTEN) {
765 len = -ENOTCONN;
7c657876 766 goto out;
7c657876 767 }
7c657876 768
531669a0 769 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
770
771 do {
531669a0 772 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 773
531669a0
ACM
774 if (skb == NULL)
775 goto verify_sock_status;
7c657876 776
531669a0 777 dh = dccp_hdr(skb);
7c657876 778
0c869620
GR
779 switch (dh->dccph_type) {
780 case DCCP_PKT_DATA:
781 case DCCP_PKT_DATAACK:
531669a0 782 goto found_ok_skb;
7c657876 783
0c869620
GR
784 case DCCP_PKT_CLOSE:
785 case DCCP_PKT_CLOSEREQ:
786 if (!(flags & MSG_PEEK))
787 dccp_finish_passive_close(sk);
788 /* fall through */
789 case DCCP_PKT_RESET:
790 dccp_pr_debug("found fin (%s) ok!\n",
791 dccp_packet_name(dh->dccph_type));
531669a0
ACM
792 len = 0;
793 goto found_fin_ok;
0c869620
GR
794 default:
795 dccp_pr_debug("packet_type=%s\n",
796 dccp_packet_name(dh->dccph_type));
797 sk_eat_skb(sk, skb, 0);
531669a0 798 }
531669a0
ACM
799verify_sock_status:
800 if (sock_flag(sk, SOCK_DONE)) {
801 len = 0;
7c657876 802 break;
531669a0 803 }
7c657876 804
531669a0
ACM
805 if (sk->sk_err) {
806 len = sock_error(sk);
807 break;
808 }
7c657876 809
531669a0
ACM
810 if (sk->sk_shutdown & RCV_SHUTDOWN) {
811 len = 0;
812 break;
813 }
7c657876 814
531669a0
ACM
815 if (sk->sk_state == DCCP_CLOSED) {
816 if (!sock_flag(sk, SOCK_DONE)) {
817 /* This occurs when user tries to read
818 * from never connected socket.
819 */
820 len = -ENOTCONN;
7c657876
ACM
821 break;
822 }
531669a0
ACM
823 len = 0;
824 break;
7c657876
ACM
825 }
826
531669a0
ACM
827 if (!timeo) {
828 len = -EAGAIN;
829 break;
830 }
7c657876 831
531669a0
ACM
832 if (signal_pending(current)) {
833 len = sock_intr_errno(timeo);
834 break;
835 }
7c657876 836
531669a0 837 sk_wait_data(sk, &timeo);
7c657876 838 continue;
7c657876 839 found_ok_skb:
531669a0
ACM
840 if (len > skb->len)
841 len = skb->len;
842 else if (len < skb->len)
843 msg->msg_flags |= MSG_TRUNC;
844
845 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
846 /* Exception. Bailout! */
847 len = -EFAULT;
848 break;
7c657876 849 }
7c657876
ACM
850 found_fin_ok:
851 if (!(flags & MSG_PEEK))
624d1164 852 sk_eat_skb(sk, skb, 0);
7c657876 853 break;
531669a0 854 } while (1);
7c657876
ACM
855out:
856 release_sock(sk);
531669a0 857 return len;
7c657876
ACM
858}
859
f21e68ca
ACM
860EXPORT_SYMBOL_GPL(dccp_recvmsg);
861
862int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
863{
864 struct sock *sk = sock->sk;
865 unsigned char old_state;
866 int err;
867
868 lock_sock(sk);
869
870 err = -EINVAL;
871 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
872 goto out;
873
874 old_state = sk->sk_state;
875 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
876 goto out;
877
878 /* Really, if the socket is already in listen state
879 * we can only allow the backlog to be adjusted.
880 */
881 if (old_state != DCCP_LISTEN) {
882 /*
883 * FIXME: here it probably should be sk->sk_prot->listen_start
884 * see tcp_listen_start
885 */
72a3effa 886 err = dccp_listen_start(sk, backlog);
7c657876
ACM
887 if (err)
888 goto out;
889 }
890 sk->sk_max_ack_backlog = backlog;
891 err = 0;
892
893out:
894 release_sock(sk);
895 return err;
896}
897
f21e68ca
ACM
898EXPORT_SYMBOL_GPL(inet_dccp_listen);
899
0c869620 900static void dccp_terminate_connection(struct sock *sk)
7c657876 901{
0c869620 902 u8 next_state = DCCP_CLOSED;
7c657876 903
0c869620
GR
904 switch (sk->sk_state) {
905 case DCCP_PASSIVE_CLOSE:
906 case DCCP_PASSIVE_CLOSEREQ:
907 dccp_finish_passive_close(sk);
908 break;
909 case DCCP_PARTOPEN:
910 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
911 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
912 /* fall through */
913 case DCCP_OPEN:
914 dccp_send_close(sk, 1);
7c657876 915
b8599d20
GR
916 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
917 !dccp_sk(sk)->dccps_server_timewait)
0c869620
GR
918 next_state = DCCP_ACTIVE_CLOSEREQ;
919 else
920 next_state = DCCP_CLOSING;
921 /* fall through */
922 default:
923 dccp_set_state(sk, next_state);
924 }
7c657876
ACM
925}
926
927void dccp_close(struct sock *sk, long timeout)
928{
97e5848d 929 struct dccp_sock *dp = dccp_sk(sk);
7c657876 930 struct sk_buff *skb;
d83bd95b 931 u32 data_was_unread = 0;
134af346 932 int state;
7c657876
ACM
933
934 lock_sock(sk);
935
936 sk->sk_shutdown = SHUTDOWN_MASK;
937
938 if (sk->sk_state == DCCP_LISTEN) {
939 dccp_set_state(sk, DCCP_CLOSED);
940
941 /* Special case. */
942 inet_csk_listen_stop(sk);
943
944 goto adjudge_to_death;
945 }
946
97e5848d
IM
947 sk_stop_timer(sk, &dp->dccps_xmit_timer);
948
7c657876
ACM
949 /*
950 * We need to flush the recv. buffs. We do this only on the
951 * descriptor close, not protocol-sourced closes, because the
952 *reader process may not have drained the data yet!
953 */
7c657876 954 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
d83bd95b 955 data_was_unread += skb->len;
7c657876
ACM
956 __kfree_skb(skb);
957 }
958
d83bd95b
GR
959 if (data_was_unread) {
960 /* Unread data was tossed, send an appropriate Reset Code */
961 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
962 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
963 dccp_set_state(sk, DCCP_CLOSED);
964 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
7c657876
ACM
965 /* Check zero linger _after_ checking for unread data. */
966 sk->sk_prot->disconnect(sk, 0);
0c869620
GR
967 } else if (sk->sk_state != DCCP_CLOSED) {
968 dccp_terminate_connection(sk);
7c657876
ACM
969 }
970
971 sk_stream_wait_close(sk, timeout);
972
973adjudge_to_death:
134af346
HX
974 state = sk->sk_state;
975 sock_hold(sk);
976 sock_orphan(sk);
977 atomic_inc(sk->sk_prot->orphan_count);
978
7ad07e7c
ACM
979 /*
980 * It is the last release_sock in its life. It will remove backlog.
981 */
7c657876
ACM
982 release_sock(sk);
983 /*
984 * Now socket is owned by kernel and we acquire BH lock
985 * to finish close. No need to check for user refs.
986 */
987 local_bh_disable();
988 bh_lock_sock(sk);
547b792c 989 WARN_ON(sock_owned_by_user(sk));
7c657876 990
134af346
HX
991 /* Have we already been destroyed by a softirq or backlog? */
992 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
993 goto out;
7ad07e7c 994
7c657876
ACM
995 if (sk->sk_state == DCCP_CLOSED)
996 inet_csk_destroy_sock(sk);
997
998 /* Otherwise, socket is reprieved until protocol close. */
999
134af346 1000out:
7c657876
ACM
1001 bh_unlock_sock(sk);
1002 local_bh_enable();
1003 sock_put(sk);
1004}
1005
f21e68ca
ACM
1006EXPORT_SYMBOL_GPL(dccp_close);
1007
7c657876
ACM
1008void dccp_shutdown(struct sock *sk, int how)
1009{
8e8c71f1 1010 dccp_pr_debug("called shutdown(%x)\n", how);
7c657876
ACM
1011}
1012
f21e68ca
ACM
1013EXPORT_SYMBOL_GPL(dccp_shutdown);
1014
24e8b7e4 1015static inline int dccp_mib_init(void)
7c657876 1016{
24e8b7e4 1017 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
7c657876
ACM
1018}
1019
24e8b7e4 1020static inline void dccp_mib_exit(void)
46f09ffa 1021{
24e8b7e4 1022 snmp_mib_free((void**)dccp_statistics);
46f09ffa
ACM
1023}
1024
7c657876
ACM
1025static int thash_entries;
1026module_param(thash_entries, int, 0444);
1027MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1028
a1d3a355 1029#ifdef CONFIG_IP_DCCP_DEBUG
7c657876 1030int dccp_debug;
042d18f9 1031module_param(dccp_debug, bool, 0444);
7c657876 1032MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
1033
1034EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 1035#endif
7c657876
ACM
1036
1037static int __init dccp_init(void)
1038{
1039 unsigned long goal;
1040 int ehash_order, bhash_order, i;
b61fafc4 1041 int rc = -ENOBUFS;
7c657876 1042
028b0275
PM
1043 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1044 FIELD_SIZEOF(struct sk_buff, cb));
1045
7690af3f
ACM
1046 dccp_hashinfo.bind_bucket_cachep =
1047 kmem_cache_create("dccp_bind_bucket",
1048 sizeof(struct inet_bind_bucket), 0,
20c2df83 1049 SLAB_HWCACHE_ALIGN, NULL);
7c657876 1050 if (!dccp_hashinfo.bind_bucket_cachep)
b61fafc4 1051 goto out;
7c657876
ACM
1052
1053 /*
1054 * Size and allocate the main established and bind bucket
1055 * hash tables.
1056 *
1057 * The methodology is similar to that of the buffer cache.
1058 */
1059 if (num_physpages >= (128 * 1024))
1060 goal = num_physpages >> (21 - PAGE_SHIFT);
1061 else
1062 goal = num_physpages >> (23 - PAGE_SHIFT);
1063
1064 if (thash_entries)
7690af3f
ACM
1065 goal = (thash_entries *
1066 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
1067 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1068 ;
1069 do {
1070 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1071 sizeof(struct inet_ehash_bucket);
7690af3f
ACM
1072 while (dccp_hashinfo.ehash_size &
1073 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
1074 dccp_hashinfo.ehash_size--;
1075 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1076 __get_free_pages(GFP_ATOMIC, ehash_order);
1077 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1078
1079 if (!dccp_hashinfo.ehash) {
59348b19 1080 DCCP_CRIT("Failed to allocate DCCP established hash table");
7c657876
ACM
1081 goto out_free_bind_bucket_cachep;
1082 }
1083
dbca9b27 1084 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
7c657876 1085 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
dbca9b27 1086 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
7c657876
ACM
1087 }
1088
230140cf
ED
1089 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1090 goto out_free_dccp_ehash;
1091
7c657876
ACM
1092 bhash_order = ehash_order;
1093
1094 do {
1095 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1096 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
1097 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1098 bhash_order > 0)
7c657876
ACM
1099 continue;
1100 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1101 __get_free_pages(GFP_ATOMIC, bhash_order);
1102 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1103
1104 if (!dccp_hashinfo.bhash) {
59348b19 1105 DCCP_CRIT("Failed to allocate DCCP bind hash table");
230140cf 1106 goto out_free_dccp_locks;
7c657876
ACM
1107 }
1108
1109 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1110 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1111 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1112 }
1113
46f09ffa 1114 rc = dccp_mib_init();
fa23e2ec 1115 if (rc)
7c657876
ACM
1116 goto out_free_dccp_bhash;
1117
9b07ef5d 1118 rc = dccp_ackvec_init();
7c657876 1119 if (rc)
b61fafc4 1120 goto out_free_dccp_mib;
9b07ef5d 1121
e55d912f 1122 rc = dccp_sysctl_init();
9b07ef5d
ACM
1123 if (rc)
1124 goto out_ackvec_exit;
4c70f383
GR
1125
1126 dccp_timestamping_init();
7c657876
ACM
1127out:
1128 return rc;
9b07ef5d
ACM
1129out_ackvec_exit:
1130 dccp_ackvec_exit();
b61fafc4 1131out_free_dccp_mib:
46f09ffa 1132 dccp_mib_exit();
7c657876
ACM
1133out_free_dccp_bhash:
1134 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1135 dccp_hashinfo.bhash = NULL;
230140cf
ED
1136out_free_dccp_locks:
1137 inet_ehash_locks_free(&dccp_hashinfo);
7c657876
ACM
1138out_free_dccp_ehash:
1139 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1140 dccp_hashinfo.ehash = NULL;
1141out_free_bind_bucket_cachep:
1142 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143 dccp_hashinfo.bind_bucket_cachep = NULL;
7c657876
ACM
1144 goto out;
1145}
1146
7c657876
ACM
1147static void __exit dccp_fini(void)
1148{
46f09ffa 1149 dccp_mib_exit();
725ba8ee
ACM
1150 free_pages((unsigned long)dccp_hashinfo.bhash,
1151 get_order(dccp_hashinfo.bhash_size *
1152 sizeof(struct inet_bind_hashbucket)));
1153 free_pages((unsigned long)dccp_hashinfo.ehash,
1154 get_order(dccp_hashinfo.ehash_size *
1155 sizeof(struct inet_ehash_bucket)));
230140cf 1156 inet_ehash_locks_free(&dccp_hashinfo);
7c657876 1157 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
9b07ef5d 1158 dccp_ackvec_exit();
e55d912f 1159 dccp_sysctl_exit();
7c657876
ACM
1160}
1161
1162module_init(dccp_init);
1163module_exit(dccp_fini);
1164
7c657876
ACM
1165MODULE_LICENSE("GPL");
1166MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1167MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");