]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/tcp_ipv6.c
[MLSXFRM]: Add flow labeling
[net-next-2.6.git] / net / ipv6 / tcp_ipv6.c
CommitLineData
1da177e4
LT
1/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
1da177e4
LT
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/jiffies.h>
35#include <linux/in.h>
36#include <linux/in6.h>
37#include <linux/netdevice.h>
38#include <linux/init.h>
39#include <linux/jhash.h>
40#include <linux/ipsec.h>
41#include <linux/times.h>
42
43#include <linux/ipv6.h>
44#include <linux/icmpv6.h>
45#include <linux/random.h>
46
47#include <net/tcp.h>
48#include <net/ndisc.h>
5324a040 49#include <net/inet6_hashtables.h>
8129765a 50#include <net/inet6_connection_sock.h>
1da177e4
LT
51#include <net/ipv6.h>
52#include <net/transp_v6.h>
53#include <net/addrconf.h>
54#include <net/ip6_route.h>
55#include <net/ip6_checksum.h>
56#include <net/inet_ecn.h>
57#include <net/protocol.h>
58#include <net/xfrm.h>
59#include <net/addrconf.h>
60#include <net/snmp.h>
61#include <net/dsfield.h>
6d6ee43e 62#include <net/timewait_sock.h>
1da177e4
LT
63
64#include <asm/uaccess.h>
65
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
68
ae0f7d5f
DW
69/* Socket used for sending RSTs and ACKs */
70static struct socket *tcp6_socket;
71
1da177e4 72static void tcp_v6_send_reset(struct sk_buff *skb);
60236fdd 73static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
8292a17a 74static void tcp_v6_send_check(struct sock *sk, int len,
1da177e4
LT
75 struct sk_buff *skb);
76
77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
1da177e4 78
8292a17a
ACM
79static struct inet_connection_sock_af_ops ipv6_mapped;
80static struct inet_connection_sock_af_ops ipv6_specific;
1da177e4 81
1da177e4
LT
82static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
83{
971af18b
ACM
84 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
85 inet6_csk_bind_conflict);
1da177e4
LT
86}
87
1da177e4
LT
88static void tcp_v6_hash(struct sock *sk)
89{
90 if (sk->sk_state != TCP_CLOSE) {
8292a17a 91 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
1da177e4
LT
92 tcp_prot.hash(sk);
93 return;
94 }
95 local_bh_disable();
90b19d31 96 __inet6_hash(&tcp_hashinfo, sk);
1da177e4
LT
97 local_bh_enable();
98 }
99}
100
1da177e4
LT
101static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
102 struct in6_addr *saddr,
103 struct in6_addr *daddr,
104 unsigned long base)
105{
106 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
107}
108
109static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
110{
111 if (skb->protocol == htons(ETH_P_IPV6)) {
112 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
113 skb->nh.ipv6h->saddr.s6_addr32,
114 skb->h.th->dest,
115 skb->h.th->source);
116 } else {
117 return secure_tcp_sequence_number(skb->nh.iph->daddr,
118 skb->nh.iph->saddr,
119 skb->h.th->dest,
120 skb->h.th->source);
121 }
122}
123
1da177e4
LT
124static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
125 int addr_len)
126{
127 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
d83d8461
ACM
128 struct inet_sock *inet = inet_sk(sk);
129 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
130 struct ipv6_pinfo *np = inet6_sk(sk);
131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p = NULL, final;
133 struct flowi fl;
134 struct dst_entry *dst;
135 int addr_type;
136 int err;
137
138 if (addr_len < SIN6_LEN_RFC2133)
139 return -EINVAL;
140
141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT);
143
144 memset(&fl, 0, sizeof(fl));
145
146 if (np->sndflow) {
147 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
148 IP6_ECN_flow_init(fl.fl6_flowlabel);
149 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
150 struct ip6_flowlabel *flowlabel;
151 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
152 if (flowlabel == NULL)
153 return -EINVAL;
154 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
155 fl6_sock_release(flowlabel);
156 }
157 }
158
159 /*
160 * connect() to INADDR_ANY means loopback (BSD'ism).
161 */
162
163 if(ipv6_addr_any(&usin->sin6_addr))
164 usin->sin6_addr.s6_addr[15] = 0x1;
165
166 addr_type = ipv6_addr_type(&usin->sin6_addr);
167
168 if(addr_type & IPV6_ADDR_MULTICAST)
169 return -ENETUNREACH;
170
171 if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 if (addr_len >= sizeof(struct sockaddr_in6) &&
173 usin->sin6_scope_id) {
174 /* If interface is set while binding, indices
175 * must coincide.
176 */
177 if (sk->sk_bound_dev_if &&
178 sk->sk_bound_dev_if != usin->sin6_scope_id)
179 return -EINVAL;
180
181 sk->sk_bound_dev_if = usin->sin6_scope_id;
182 }
183
184 /* Connect to link-local address requires an interface */
185 if (!sk->sk_bound_dev_if)
186 return -EINVAL;
187 }
188
189 if (tp->rx_opt.ts_recent_stamp &&
190 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
193 tp->write_seq = 0;
194 }
195
196 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
197 np->flow_label = fl.fl6_flowlabel;
198
199 /*
200 * TCP over IPv4
201 */
202
203 if (addr_type == IPV6_ADDR_MAPPED) {
d83d8461 204 u32 exthdrlen = icsk->icsk_ext_hdr_len;
1da177e4
LT
205 struct sockaddr_in sin;
206
207 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
208
209 if (__ipv6_only_sock(sk))
210 return -ENETUNREACH;
211
212 sin.sin_family = AF_INET;
213 sin.sin_port = usin->sin6_port;
214 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
215
d83d8461 216 icsk->icsk_af_ops = &ipv6_mapped;
1da177e4
LT
217 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218
219 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220
221 if (err) {
d83d8461
ACM
222 icsk->icsk_ext_hdr_len = exthdrlen;
223 icsk->icsk_af_ops = &ipv6_specific;
1da177e4
LT
224 sk->sk_backlog_rcv = tcp_v6_do_rcv;
225 goto failure;
226 } else {
227 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
228 inet->saddr);
229 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
230 inet->rcv_saddr);
231 }
232
233 return err;
234 }
235
236 if (!ipv6_addr_any(&np->rcv_saddr))
237 saddr = &np->rcv_saddr;
238
239 fl.proto = IPPROTO_TCP;
240 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241 ipv6_addr_copy(&fl.fl6_src,
242 (saddr ? saddr : &np->saddr));
243 fl.oif = sk->sk_bound_dev_if;
244 fl.fl_ip_dport = usin->sin6_port;
245 fl.fl_ip_sport = inet->sport;
246
247 if (np->opt && np->opt->srcrt) {
248 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
249 ipv6_addr_copy(&final, &fl.fl6_dst);
250 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
251 final_p = &final;
252 }
253
beb8d13b
VY
254 security_sk_classify_flow(sk, &fl);
255
1da177e4
LT
256 err = ip6_dst_lookup(sk, &dst, &fl);
257 if (err)
258 goto failure;
259 if (final_p)
260 ipv6_addr_copy(&fl.fl6_dst, final_p);
261
e104411b 262 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1da177e4 263 goto failure;
1da177e4
LT
264
265 if (saddr == NULL) {
266 saddr = &fl.fl6_src;
267 ipv6_addr_copy(&np->rcv_saddr, saddr);
268 }
269
270 /* set the source address */
271 ipv6_addr_copy(&np->saddr, saddr);
272 inet->rcv_saddr = LOOPBACK4_IPV6;
273
f83ef8c0 274 sk->sk_gso_type = SKB_GSO_TCPV6;
497c615a 275 __ip6_dst_store(sk, dst, NULL);
1da177e4 276
d83d8461 277 icsk->icsk_ext_hdr_len = 0;
1da177e4 278 if (np->opt)
d83d8461
ACM
279 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
280 np->opt->opt_nflen);
1da177e4
LT
281
282 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
283
284 inet->dport = usin->sin6_port;
285
286 tcp_set_state(sk, TCP_SYN_SENT);
d8313f5c 287 err = inet6_hash_connect(&tcp_death_row, sk);
1da177e4
LT
288 if (err)
289 goto late_failure;
290
291 if (!tp->write_seq)
292 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
293 np->daddr.s6_addr32,
294 inet->sport,
295 inet->dport);
296
297 err = tcp_connect(sk);
298 if (err)
299 goto late_failure;
300
301 return 0;
302
303late_failure:
304 tcp_set_state(sk, TCP_CLOSE);
305 __sk_dst_reset(sk);
306failure:
307 inet->dport = 0;
308 sk->sk_route_caps = 0;
309 return err;
310}
311
312static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
313 int type, int code, int offset, __u32 info)
314{
315 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
505cbfc5 316 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
1da177e4
LT
317 struct ipv6_pinfo *np;
318 struct sock *sk;
319 int err;
320 struct tcp_sock *tp;
321 __u32 seq;
322
505cbfc5
ACM
323 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
324 th->source, skb->dev->ifindex);
1da177e4
LT
325
326 if (sk == NULL) {
327 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
328 return;
329 }
330
331 if (sk->sk_state == TCP_TIME_WAIT) {
8feaf0c0 332 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
333 return;
334 }
335
336 bh_lock_sock(sk);
337 if (sock_owned_by_user(sk))
338 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
339
340 if (sk->sk_state == TCP_CLOSE)
341 goto out;
342
343 tp = tcp_sk(sk);
344 seq = ntohl(th->seq);
345 if (sk->sk_state != TCP_LISTEN &&
346 !between(seq, tp->snd_una, tp->snd_nxt)) {
347 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
348 goto out;
349 }
350
351 np = inet6_sk(sk);
352
353 if (type == ICMPV6_PKT_TOOBIG) {
354 struct dst_entry *dst = NULL;
355
356 if (sock_owned_by_user(sk))
357 goto out;
358 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359 goto out;
360
361 /* icmp should have updated the destination cache entry */
362 dst = __sk_dst_check(sk, np->dst_cookie);
363
364 if (dst == NULL) {
365 struct inet_sock *inet = inet_sk(sk);
366 struct flowi fl;
367
368 /* BUGGG_FUTURE: Again, it is not clear how
369 to handle rthdr case. Ignore this complexity
370 for now.
371 */
372 memset(&fl, 0, sizeof(fl));
373 fl.proto = IPPROTO_TCP;
374 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
375 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
376 fl.oif = sk->sk_bound_dev_if;
377 fl.fl_ip_dport = inet->dport;
378 fl.fl_ip_sport = inet->sport;
beb8d13b 379 security_skb_classify_flow(skb, &fl);
1da177e4
LT
380
381 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
382 sk->sk_err_soft = -err;
383 goto out;
384 }
385
386 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
387 sk->sk_err_soft = -err;
388 goto out;
389 }
390
391 } else
392 dst_hold(dst);
393
d83d8461 394 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
1da177e4
LT
395 tcp_sync_mss(sk, dst_mtu(dst));
396 tcp_simple_retransmit(sk);
397 } /* else let the usual retransmit timer handle it */
398 dst_release(dst);
399 goto out;
400 }
401
402 icmpv6_err_convert(type, code, &err);
403
60236fdd 404 /* Might be for an request_sock */
1da177e4 405 switch (sk->sk_state) {
60236fdd 406 struct request_sock *req, **prev;
1da177e4
LT
407 case TCP_LISTEN:
408 if (sock_owned_by_user(sk))
409 goto out;
410
8129765a
ACM
411 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
412 &hdr->saddr, inet6_iif(skb));
1da177e4
LT
413 if (!req)
414 goto out;
415
416 /* ICMPs are not backlogged, hence we cannot get
417 * an established socket here.
418 */
419 BUG_TRAP(req->sk == NULL);
420
2e6599cb 421 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
422 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
423 goto out;
424 }
425
463c84b9 426 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
427 goto out;
428
429 case TCP_SYN_SENT:
430 case TCP_SYN_RECV: /* Cannot happen.
431 It can, it SYNs are crossed. --ANK */
432 if (!sock_owned_by_user(sk)) {
1da177e4
LT
433 sk->sk_err = err;
434 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
435
436 tcp_done(sk);
437 } else
438 sk->sk_err_soft = err;
439 goto out;
440 }
441
442 if (!sock_owned_by_user(sk) && np->recverr) {
443 sk->sk_err = err;
444 sk->sk_error_report(sk);
445 } else
446 sk->sk_err_soft = err;
447
448out:
449 bh_unlock_sock(sk);
450 sock_put(sk);
451}
452
453
60236fdd 454static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
455 struct dst_entry *dst)
456{
ca304b61 457 struct inet6_request_sock *treq = inet6_rsk(req);
1da177e4
LT
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct sk_buff * skb;
460 struct ipv6_txoptions *opt = NULL;
461 struct in6_addr * final_p = NULL, final;
462 struct flowi fl;
463 int err = -1;
464
465 memset(&fl, 0, sizeof(fl));
466 fl.proto = IPPROTO_TCP;
2e6599cb
ACM
467 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
468 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 469 fl.fl6_flowlabel = 0;
2e6599cb
ACM
470 fl.oif = treq->iif;
471 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4 472 fl.fl_ip_sport = inet_sk(sk)->sport;
beb8d13b 473 security_sk_classify_flow(sk, &fl);
1da177e4
LT
474
475 if (dst == NULL) {
476 opt = np->opt;
477 if (opt == NULL &&
333fad53 478 np->rxopt.bits.osrcrt == 2 &&
2e6599cb
ACM
479 treq->pktopts) {
480 struct sk_buff *pktopts = treq->pktopts;
1da177e4
LT
481 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
482 if (rxopt->srcrt)
483 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
484 }
485
486 if (opt && opt->srcrt) {
487 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
488 ipv6_addr_copy(&final, &fl.fl6_dst);
489 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
490 final_p = &final;
491 }
492
493 err = ip6_dst_lookup(sk, &dst, &fl);
494 if (err)
495 goto done;
496 if (final_p)
497 ipv6_addr_copy(&fl.fl6_dst, final_p);
498 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
499 goto done;
500 }
501
502 skb = tcp_make_synack(sk, dst, req);
503 if (skb) {
504 struct tcphdr *th = skb->h.th;
505
506 th->check = tcp_v6_check(th, skb->len,
2e6599cb 507 &treq->loc_addr, &treq->rmt_addr,
1da177e4
LT
508 csum_partial((char *)th, skb->len, skb->csum));
509
2e6599cb 510 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
511 err = ip6_xmit(sk, skb, &fl, opt, 0);
512 if (err == NET_XMIT_CN)
513 err = 0;
514 }
515
516done:
1da177e4
LT
517 if (opt && opt != np->opt)
518 sock_kfree_s(sk, opt, opt->tot_len);
78b91042 519 dst_release(dst);
1da177e4
LT
520 return err;
521}
522
60236fdd 523static void tcp_v6_reqsk_destructor(struct request_sock *req)
1da177e4 524{
ca304b61
ACM
525 if (inet6_rsk(req)->pktopts)
526 kfree_skb(inet6_rsk(req)->pktopts);
1da177e4
LT
527}
528
60236fdd 529static struct request_sock_ops tcp6_request_sock_ops = {
1da177e4 530 .family = AF_INET6,
2e6599cb 531 .obj_size = sizeof(struct tcp6_request_sock),
1da177e4 532 .rtx_syn_ack = tcp_v6_send_synack,
60236fdd
ACM
533 .send_ack = tcp_v6_reqsk_send_ack,
534 .destructor = tcp_v6_reqsk_destructor,
1da177e4
LT
535 .send_reset = tcp_v6_send_reset
536};
537
6d6ee43e
ACM
538static struct timewait_sock_ops tcp6_timewait_sock_ops = {
539 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
540 .twsk_unique = tcp_twsk_unique,
541};
542
8292a17a 543static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
1da177e4
LT
544{
545 struct ipv6_pinfo *np = inet6_sk(sk);
8292a17a 546 struct tcphdr *th = skb->h.th;
1da177e4
LT
547
548 if (skb->ip_summed == CHECKSUM_HW) {
549 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
550 skb->csum = offsetof(struct tcphdr, check);
551 } else {
552 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
553 csum_partial((char *)th, th->doff<<2,
554 skb->csum));
555 }
556}
557
a430a43d
HX
558static int tcp_v6_gso_send_check(struct sk_buff *skb)
559{
560 struct ipv6hdr *ipv6h;
561 struct tcphdr *th;
562
563 if (!pskb_may_pull(skb, sizeof(*th)))
564 return -EINVAL;
565
566 ipv6h = skb->nh.ipv6h;
567 th = skb->h.th;
568
569 th->check = 0;
570 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
571 IPPROTO_TCP, 0);
572 skb->csum = offsetof(struct tcphdr, check);
573 skb->ip_summed = CHECKSUM_HW;
574 return 0;
575}
1da177e4
LT
576
577static void tcp_v6_send_reset(struct sk_buff *skb)
578{
579 struct tcphdr *th = skb->h.th, *t1;
580 struct sk_buff *buff;
581 struct flowi fl;
582
583 if (th->rst)
584 return;
585
586 if (!ipv6_unicast_destination(skb))
587 return;
588
589 /*
590 * We need to grab some memory, and put together an RST,
591 * and then put it into the queue to be sent.
592 */
593
594 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
595 GFP_ATOMIC);
596 if (buff == NULL)
597 return;
598
599 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
600
601 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
602
603 /* Swap the send and the receive. */
604 memset(t1, 0, sizeof(*t1));
605 t1->dest = th->source;
606 t1->source = th->dest;
607 t1->doff = sizeof(*t1)/4;
608 t1->rst = 1;
609
610 if(th->ack) {
611 t1->seq = th->ack_seq;
612 } else {
613 t1->ack = 1;
614 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
615 + skb->len - (th->doff<<2));
616 }
617
618 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
619
620 memset(&fl, 0, sizeof(fl));
621 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
622 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
623
624 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
625 sizeof(*t1), IPPROTO_TCP,
626 buff->csum);
627
628 fl.proto = IPPROTO_TCP;
505cbfc5 629 fl.oif = inet6_iif(skb);
1da177e4
LT
630 fl.fl_ip_dport = t1->dest;
631 fl.fl_ip_sport = t1->source;
beb8d13b 632 security_skb_classify_flow(skb, &fl);
1da177e4
LT
633
634 /* sk = NULL, but it is safe for now. RST socket required. */
635 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
636
ecc51b6d 637 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
ae0f7d5f 638 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
ecc51b6d
ACM
639 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
640 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1da177e4 641 return;
ecc51b6d 642 }
1da177e4
LT
643 }
644
645 kfree_skb(buff);
646}
647
648static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
649{
650 struct tcphdr *th = skb->h.th, *t1;
651 struct sk_buff *buff;
652 struct flowi fl;
653 int tot_len = sizeof(struct tcphdr);
654
655 if (ts)
656 tot_len += 3*4;
657
658 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
659 GFP_ATOMIC);
660 if (buff == NULL)
661 return;
662
663 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
664
665 t1 = (struct tcphdr *) skb_push(buff,tot_len);
666
667 /* Swap the send and the receive. */
668 memset(t1, 0, sizeof(*t1));
669 t1->dest = th->source;
670 t1->source = th->dest;
671 t1->doff = tot_len/4;
672 t1->seq = htonl(seq);
673 t1->ack_seq = htonl(ack);
674 t1->ack = 1;
675 t1->window = htons(win);
676
677 if (ts) {
678 u32 *ptr = (u32*)(t1 + 1);
679 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
680 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
681 *ptr++ = htonl(tcp_time_stamp);
682 *ptr = htonl(ts);
683 }
684
685 buff->csum = csum_partial((char *)t1, tot_len, 0);
686
687 memset(&fl, 0, sizeof(fl));
688 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
689 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
690
691 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
692 tot_len, IPPROTO_TCP,
693 buff->csum);
694
695 fl.proto = IPPROTO_TCP;
505cbfc5 696 fl.oif = inet6_iif(skb);
1da177e4
LT
697 fl.fl_ip_dport = t1->dest;
698 fl.fl_ip_sport = t1->source;
beb8d13b 699 security_skb_classify_flow(skb, &fl);
1da177e4
LT
700
701 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
ecc51b6d 702 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
ae0f7d5f 703 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
ecc51b6d 704 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1da177e4 705 return;
ecc51b6d 706 }
1da177e4
LT
707 }
708
709 kfree_skb(buff);
710}
711
712static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
713{
8feaf0c0
ACM
714 struct inet_timewait_sock *tw = inet_twsk(sk);
715 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 716
8feaf0c0
ACM
717 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
718 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
719 tcptw->tw_ts_recent);
1da177e4 720
8feaf0c0 721 inet_twsk_put(tw);
1da177e4
LT
722}
723
60236fdd 724static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 725{
2e6599cb 726 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1da177e4
LT
727}
728
729
730static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
731{
60236fdd 732 struct request_sock *req, **prev;
505cbfc5 733 const struct tcphdr *th = skb->h.th;
1da177e4
LT
734 struct sock *nsk;
735
736 /* Find possible connection requests. */
8129765a
ACM
737 req = inet6_csk_search_req(sk, &prev, th->source,
738 &skb->nh.ipv6h->saddr,
739 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1da177e4
LT
740 if (req)
741 return tcp_check_req(sk, skb, req, prev);
742
505cbfc5
ACM
743 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
744 th->source, &skb->nh.ipv6h->daddr,
745 ntohs(th->dest), inet6_iif(skb));
1da177e4
LT
746
747 if (nsk) {
748 if (nsk->sk_state != TCP_TIME_WAIT) {
749 bh_lock_sock(nsk);
750 return nsk;
751 }
8feaf0c0 752 inet_twsk_put((struct inet_timewait_sock *)nsk);
1da177e4
LT
753 return NULL;
754 }
755
756#if 0 /*def CONFIG_SYN_COOKIES*/
757 if (!th->rst && !th->syn && th->ack)
758 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
759#endif
760 return sk;
761}
762
1da177e4
LT
763/* FIXME: this is substantially similar to the ipv4 code.
764 * Can some kind of merge be done? -- erics
765 */
766static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
767{
ca304b61 768 struct inet6_request_sock *treq;
1da177e4
LT
769 struct ipv6_pinfo *np = inet6_sk(sk);
770 struct tcp_options_received tmp_opt;
771 struct tcp_sock *tp = tcp_sk(sk);
60236fdd 772 struct request_sock *req = NULL;
1da177e4
LT
773 __u32 isn = TCP_SKB_CB(skb)->when;
774
775 if (skb->protocol == htons(ETH_P_IP))
776 return tcp_v4_conn_request(sk, skb);
777
778 if (!ipv6_unicast_destination(skb))
779 goto drop;
780
781 /*
782 * There are no SYN attacks on IPv6, yet...
783 */
463c84b9 784 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
785 if (net_ratelimit())
786 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
787 goto drop;
788 }
789
463c84b9 790 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
791 goto drop;
792
ca304b61 793 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1da177e4
LT
794 if (req == NULL)
795 goto drop;
796
797 tcp_clear_options(&tmp_opt);
798 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
799 tmp_opt.user_mss = tp->rx_opt.user_mss;
800
801 tcp_parse_options(skb, &tmp_opt, 0);
802
803 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
804 tcp_openreq_init(req, &tmp_opt, skb);
805
ca304b61 806 treq = inet6_rsk(req);
2e6599cb
ACM
807 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
808 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1da177e4 809 TCP_ECN_create_request(req, skb->h.th);
2e6599cb 810 treq->pktopts = NULL;
1da177e4 811 if (ipv6_opt_accepted(sk, skb) ||
333fad53
YH
812 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
813 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1da177e4 814 atomic_inc(&skb->users);
2e6599cb 815 treq->pktopts = skb;
1da177e4 816 }
2e6599cb 817 treq->iif = sk->sk_bound_dev_if;
1da177e4
LT
818
819 /* So that link locals have meaning */
820 if (!sk->sk_bound_dev_if &&
2e6599cb 821 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
505cbfc5 822 treq->iif = inet6_iif(skb);
1da177e4
LT
823
824 if (isn == 0)
825 isn = tcp_v6_init_sequence(sk,skb);
826
2e6599cb 827 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
828
829 if (tcp_v6_send_synack(sk, req, NULL))
830 goto drop;
831
8129765a 832 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
833 return 0;
834
835drop:
836 if (req)
60236fdd 837 reqsk_free(req);
1da177e4 838
1da177e4
LT
839 return 0; /* don't send reset */
840}
841
842static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 843 struct request_sock *req,
1da177e4
LT
844 struct dst_entry *dst)
845{
ca304b61 846 struct inet6_request_sock *treq = inet6_rsk(req);
1da177e4
LT
847 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
848 struct tcp6_sock *newtcp6sk;
849 struct inet_sock *newinet;
850 struct tcp_sock *newtp;
851 struct sock *newsk;
852 struct ipv6_txoptions *opt;
853
854 if (skb->protocol == htons(ETH_P_IP)) {
855 /*
856 * v6 mapped
857 */
858
859 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
860
861 if (newsk == NULL)
862 return NULL;
863
864 newtcp6sk = (struct tcp6_sock *)newsk;
865 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
866
867 newinet = inet_sk(newsk);
868 newnp = inet6_sk(newsk);
869 newtp = tcp_sk(newsk);
870
871 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
872
873 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
874 newinet->daddr);
875
876 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
877 newinet->saddr);
878
879 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
880
8292a17a 881 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1da177e4
LT
882 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
883 newnp->pktoptions = NULL;
884 newnp->opt = NULL;
505cbfc5 885 newnp->mcast_oif = inet6_iif(skb);
1da177e4
LT
886 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
887
e6848976
ACM
888 /*
889 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
890 * here, tcp_create_openreq_child now does this for us, see the comment in
891 * that function for the gory details. -acme
1da177e4 892 */
1da177e4
LT
893
894 /* It is tricky place. Until this moment IPv4 tcp
8292a17a 895 worked with IPv6 icsk.icsk_af_ops.
1da177e4
LT
896 Sync it now.
897 */
d83d8461 898 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1da177e4
LT
899
900 return newsk;
901 }
902
903 opt = np->opt;
904
905 if (sk_acceptq_is_full(sk))
906 goto out_overflow;
907
333fad53 908 if (np->rxopt.bits.osrcrt == 2 &&
2e6599cb
ACM
909 opt == NULL && treq->pktopts) {
910 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1da177e4 911 if (rxopt->srcrt)
2e6599cb 912 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1da177e4
LT
913 }
914
915 if (dst == NULL) {
916 struct in6_addr *final_p = NULL, final;
917 struct flowi fl;
918
919 memset(&fl, 0, sizeof(fl));
920 fl.proto = IPPROTO_TCP;
2e6599cb 921 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
922 if (opt && opt->srcrt) {
923 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
924 ipv6_addr_copy(&final, &fl.fl6_dst);
925 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
926 final_p = &final;
927 }
2e6599cb 928 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 929 fl.oif = sk->sk_bound_dev_if;
2e6599cb 930 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4 931 fl.fl_ip_sport = inet_sk(sk)->sport;
beb8d13b 932 security_sk_classify_flow(sk, &fl);
1da177e4
LT
933
934 if (ip6_dst_lookup(sk, &dst, &fl))
935 goto out;
936
937 if (final_p)
938 ipv6_addr_copy(&fl.fl6_dst, final_p);
939
940 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
941 goto out;
942 }
943
944 newsk = tcp_create_openreq_child(sk, req, skb);
945 if (newsk == NULL)
946 goto out;
947
e6848976
ACM
948 /*
949 * No need to charge this sock to the relevant IPv6 refcnt debug socks
950 * count here, tcp_create_openreq_child now does this for us, see the
951 * comment in that function for the gory details. -acme
952 */
1da177e4 953
59eed279 954 newsk->sk_gso_type = SKB_GSO_TCPV6;
497c615a 955 __ip6_dst_store(newsk, dst, NULL);
1da177e4
LT
956
957 newtcp6sk = (struct tcp6_sock *)newsk;
958 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
959
960 newtp = tcp_sk(newsk);
961 newinet = inet_sk(newsk);
962 newnp = inet6_sk(newsk);
963
964 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
965
2e6599cb
ACM
966 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
967 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
968 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
969 newsk->sk_bound_dev_if = treq->iif;
1da177e4
LT
970
971 /* Now IPv6 options...
972
973 First: no IPv4 options.
974 */
975 newinet->opt = NULL;
976
977 /* Clone RX bits */
978 newnp->rxopt.all = np->rxopt.all;
979
980 /* Clone pktoptions received with SYN */
981 newnp->pktoptions = NULL;
2e6599cb
ACM
982 if (treq->pktopts != NULL) {
983 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
984 kfree_skb(treq->pktopts);
985 treq->pktopts = NULL;
1da177e4
LT
986 if (newnp->pktoptions)
987 skb_set_owner_r(newnp->pktoptions, newsk);
988 }
989 newnp->opt = NULL;
505cbfc5 990 newnp->mcast_oif = inet6_iif(skb);
1da177e4
LT
991 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
992
993 /* Clone native IPv6 options from listening socket (if any)
994
995 Yes, keeping reference count would be much more clever,
996 but we make one more one thing there: reattach optmem
997 to newsk.
998 */
999 if (opt) {
1000 newnp->opt = ipv6_dup_options(newsk, opt);
1001 if (opt != np->opt)
1002 sock_kfree_s(sk, opt, opt->tot_len);
1003 }
1004
d83d8461 1005 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1006 if (newnp->opt)
d83d8461
ACM
1007 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1008 newnp->opt->opt_flen);
1da177e4 1009
5d424d5a 1010 tcp_mtup_init(newsk);
1da177e4
LT
1011 tcp_sync_mss(newsk, dst_mtu(dst));
1012 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1013 tcp_initialize_rcv_mss(newsk);
1014
1015 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1016
90b19d31 1017 __inet6_hash(&tcp_hashinfo, newsk);
2d8c4ce5 1018 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1019
1020 return newsk;
1021
1022out_overflow:
1023 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1024out:
1025 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1026 if (opt && opt != np->opt)
1027 sock_kfree_s(sk, opt, opt->tot_len);
1028 dst_release(dst);
1029 return NULL;
1030}
1031
1032static int tcp_v6_checksum_init(struct sk_buff *skb)
1033{
1034 if (skb->ip_summed == CHECKSUM_HW) {
1da177e4 1035 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
fb286bb2
HX
1036 &skb->nh.ipv6h->daddr,skb->csum)) {
1037 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1038 return 0;
fb286bb2 1039 }
1da177e4 1040 }
fb286bb2
HX
1041
1042 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1043 &skb->nh.ipv6h->daddr, 0);
1044
1da177e4 1045 if (skb->len <= 76) {
fb286bb2 1046 return __skb_checksum_complete(skb);
1da177e4
LT
1047 }
1048 return 0;
1049}
1050
1051/* The socket must have it's spinlock held when we get
1052 * here.
1053 *
1054 * We have a potential double-lock case here, so even when
1055 * doing backlog processing we use the BH locking scheme.
1056 * This is because we cannot sleep with the original spinlock
1057 * held.
1058 */
1059static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1060{
1061 struct ipv6_pinfo *np = inet6_sk(sk);
1062 struct tcp_sock *tp;
1063 struct sk_buff *opt_skb = NULL;
1064
1065 /* Imagine: socket is IPv6. IPv4 packet arrives,
1066 goes to IPv4 receive handler and backlogged.
1067 From backlog it always goes here. Kerboom...
1068 Fortunately, tcp_rcv_established and rcv_established
1069 handle them correctly, but it is not case with
1070 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1071 */
1072
1073 if (skb->protocol == htons(ETH_P_IP))
1074 return tcp_v4_do_rcv(sk, skb);
1075
1076 if (sk_filter(sk, skb, 0))
1077 goto discard;
1078
1079 /*
1080 * socket locking is here for SMP purposes as backlog rcv
1081 * is currently called with bh processing disabled.
1082 */
1083
1084 /* Do Stevens' IPV6_PKTOPTIONS.
1085
1086 Yes, guys, it is the only place in our code, where we
1087 may make it not affecting IPv4.
1088 The rest of code is protocol independent,
1089 and I do not like idea to uglify IPv4.
1090
1091 Actually, all the idea behind IPV6_PKTOPTIONS
1092 looks not very well thought. For now we latch
1093 options, received in the last packet, enqueued
1094 by tcp. Feel free to propose better solution.
1095 --ANK (980728)
1096 */
1097 if (np->rxopt.all)
1098 opt_skb = skb_clone(skb, GFP_ATOMIC);
1099
1100 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1101 TCP_CHECK_TIMER(sk);
1102 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1103 goto reset;
1104 TCP_CHECK_TIMER(sk);
1105 if (opt_skb)
1106 goto ipv6_pktoptions;
1107 return 0;
1108 }
1109
1110 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1111 goto csum_err;
1112
1113 if (sk->sk_state == TCP_LISTEN) {
1114 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1115 if (!nsk)
1116 goto discard;
1117
1118 /*
1119 * Queue it on the new socket if the new socket is active,
1120 * otherwise we just shortcircuit this and continue with
1121 * the new socket..
1122 */
1123 if(nsk != sk) {
1124 if (tcp_child_process(sk, nsk, skb))
1125 goto reset;
1126 if (opt_skb)
1127 __kfree_skb(opt_skb);
1128 return 0;
1129 }
1130 }
1131
1132 TCP_CHECK_TIMER(sk);
1133 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1134 goto reset;
1135 TCP_CHECK_TIMER(sk);
1136 if (opt_skb)
1137 goto ipv6_pktoptions;
1138 return 0;
1139
1140reset:
1141 tcp_v6_send_reset(skb);
1142discard:
1143 if (opt_skb)
1144 __kfree_skb(opt_skb);
1145 kfree_skb(skb);
1146 return 0;
1147csum_err:
1148 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1149 goto discard;
1150
1151
1152ipv6_pktoptions:
1153 /* Do you ask, what is it?
1154
1155 1. skb was enqueued by tcp.
1156 2. skb is added to tail of read queue, rather than out of order.
1157 3. socket is not in passive state.
1158 4. Finally, it really contains options, which user wants to receive.
1159 */
1160 tp = tcp_sk(sk);
1161 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1162 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
333fad53 1163 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
505cbfc5 1164 np->mcast_oif = inet6_iif(opt_skb);
333fad53 1165 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1da177e4
LT
1166 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1167 if (ipv6_opt_accepted(sk, opt_skb)) {
1168 skb_set_owner_r(opt_skb, sk);
1169 opt_skb = xchg(&np->pktoptions, opt_skb);
1170 } else {
1171 __kfree_skb(opt_skb);
1172 opt_skb = xchg(&np->pktoptions, NULL);
1173 }
1174 }
1175
1176 if (opt_skb)
1177 kfree_skb(opt_skb);
1178 return 0;
1179}
1180
951dbc8a 1181static int tcp_v6_rcv(struct sk_buff **pskb)
1da177e4
LT
1182{
1183 struct sk_buff *skb = *pskb;
1184 struct tcphdr *th;
1185 struct sock *sk;
1186 int ret;
1187
1188 if (skb->pkt_type != PACKET_HOST)
1189 goto discard_it;
1190
1191 /*
1192 * Count it even if it's bad.
1193 */
1194 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1195
1196 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1197 goto discard_it;
1198
1199 th = skb->h.th;
1200
1201 if (th->doff < sizeof(struct tcphdr)/4)
1202 goto bad_packet;
1203 if (!pskb_may_pull(skb, th->doff*4))
1204 goto discard_it;
1205
1206 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
fb286bb2 1207 tcp_v6_checksum_init(skb)))
1da177e4
LT
1208 goto bad_packet;
1209
1210 th = skb->h.th;
1211 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1212 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1213 skb->len - th->doff*4);
1214 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1215 TCP_SKB_CB(skb)->when = 0;
1216 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1217 TCP_SKB_CB(skb)->sacked = 0;
1218
505cbfc5
ACM
1219 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1220 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1221 inet6_iif(skb));
1da177e4
LT
1222
1223 if (!sk)
1224 goto no_tcp_socket;
1225
1226process:
1227 if (sk->sk_state == TCP_TIME_WAIT)
1228 goto do_time_wait;
1229
1230 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1231 goto discard_and_relse;
1232
1233 if (sk_filter(sk, skb, 0))
1234 goto discard_and_relse;
1235
1236 skb->dev = NULL;
1237
1238 bh_lock_sock(sk);
1239 ret = 0;
1240 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1241#ifdef CONFIG_NET_DMA
1242 struct tcp_sock *tp = tcp_sk(sk);
1243 if (tp->ucopy.dma_chan)
1244 ret = tcp_v6_do_rcv(sk, skb);
1245 else
1246#endif
1247 {
1248 if (!tcp_prequeue(sk, skb))
1249 ret = tcp_v6_do_rcv(sk, skb);
1250 }
1da177e4
LT
1251 } else
1252 sk_add_backlog(sk, skb);
1253 bh_unlock_sock(sk);
1254
1255 sock_put(sk);
1256 return ret ? -1 : 0;
1257
1258no_tcp_socket:
1259 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1260 goto discard_it;
1261
1262 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1263bad_packet:
1264 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1265 } else {
1266 tcp_v6_send_reset(skb);
1267 }
1268
1269discard_it:
1270
1271 /*
1272 * Discard frame
1273 */
1274
1275 kfree_skb(skb);
1276 return 0;
1277
1278discard_and_relse:
1279 sock_put(sk);
1280 goto discard_it;
1281
1282do_time_wait:
1283 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
8feaf0c0 1284 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1285 goto discard_it;
1286 }
1287
1288 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1289 TCP_INC_STATS_BH(TCP_MIB_INERRS);
8feaf0c0 1290 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1291 goto discard_it;
1292 }
1293
8feaf0c0
ACM
1294 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1295 skb, th)) {
1da177e4
LT
1296 case TCP_TW_SYN:
1297 {
1298 struct sock *sk2;
1299
505cbfc5
ACM
1300 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1301 &skb->nh.ipv6h->daddr,
1302 ntohs(th->dest), inet6_iif(skb));
1da177e4 1303 if (sk2 != NULL) {
295ff7ed
ACM
1304 struct inet_timewait_sock *tw = inet_twsk(sk);
1305 inet_twsk_deschedule(tw, &tcp_death_row);
1306 inet_twsk_put(tw);
1da177e4
LT
1307 sk = sk2;
1308 goto process;
1309 }
1310 /* Fall through to ACK */
1311 }
1312 case TCP_TW_ACK:
1313 tcp_v6_timewait_ack(sk, skb);
1314 break;
1315 case TCP_TW_RST:
1316 goto no_tcp_socket;
1317 case TCP_TW_SUCCESS:;
1318 }
1319 goto discard_it;
1320}
1321
1da177e4
LT
1322static int tcp_v6_remember_stamp(struct sock *sk)
1323{
1324 /* Alas, not yet... */
1325 return 0;
1326}
1327
8292a17a 1328static struct inet_connection_sock_af_ops ipv6_specific = {
543d9cfe
ACM
1329 .queue_xmit = inet6_csk_xmit,
1330 .send_check = tcp_v6_send_check,
1331 .rebuild_header = inet6_sk_rebuild_header,
1332 .conn_request = tcp_v6_conn_request,
1333 .syn_recv_sock = tcp_v6_syn_recv_sock,
1334 .remember_stamp = tcp_v6_remember_stamp,
1335 .net_header_len = sizeof(struct ipv6hdr),
1336 .setsockopt = ipv6_setsockopt,
1337 .getsockopt = ipv6_getsockopt,
1338 .addr2sockaddr = inet6_csk_addr2sockaddr,
1339 .sockaddr_len = sizeof(struct sockaddr_in6),
3fdadf7d 1340#ifdef CONFIG_COMPAT
543d9cfe
ACM
1341 .compat_setsockopt = compat_ipv6_setsockopt,
1342 .compat_getsockopt = compat_ipv6_getsockopt,
3fdadf7d 1343#endif
1da177e4
LT
1344};
1345
1346/*
1347 * TCP over IPv4 via INET6 API
1348 */
1349
8292a17a 1350static struct inet_connection_sock_af_ops ipv6_mapped = {
543d9cfe
ACM
1351 .queue_xmit = ip_queue_xmit,
1352 .send_check = tcp_v4_send_check,
1353 .rebuild_header = inet_sk_rebuild_header,
1354 .conn_request = tcp_v6_conn_request,
1355 .syn_recv_sock = tcp_v6_syn_recv_sock,
1356 .remember_stamp = tcp_v4_remember_stamp,
1357 .net_header_len = sizeof(struct iphdr),
1358 .setsockopt = ipv6_setsockopt,
1359 .getsockopt = ipv6_getsockopt,
1360 .addr2sockaddr = inet6_csk_addr2sockaddr,
1361 .sockaddr_len = sizeof(struct sockaddr_in6),
3fdadf7d 1362#ifdef CONFIG_COMPAT
543d9cfe
ACM
1363 .compat_setsockopt = compat_ipv6_setsockopt,
1364 .compat_getsockopt = compat_ipv6_getsockopt,
3fdadf7d 1365#endif
1da177e4
LT
1366};
1367
1da177e4
LT
1368/* NOTE: A lot of things set to zero explicitly by call to
1369 * sk_alloc() so need not be done here.
1370 */
1371static int tcp_v6_init_sock(struct sock *sk)
1372{
6687e988 1373 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1374 struct tcp_sock *tp = tcp_sk(sk);
1375
1376 skb_queue_head_init(&tp->out_of_order_queue);
1377 tcp_init_xmit_timers(sk);
1378 tcp_prequeue_init(tp);
1379
6687e988 1380 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1381 tp->mdev = TCP_TIMEOUT_INIT;
1382
1383 /* So many TCP implementations out there (incorrectly) count the
1384 * initial SYN frame in their delayed-ACK and congestion control
1385 * algorithms that we must have the following bandaid to talk
1386 * efficiently to them. -DaveM
1387 */
1388 tp->snd_cwnd = 2;
1389
1390 /* See draft-stevens-tcpca-spec-01 for discussion of the
1391 * initialization of these values.
1392 */
1393 tp->snd_ssthresh = 0x7fffffff;
1394 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1395 tp->mss_cache = 536;
1da177e4
LT
1396
1397 tp->reordering = sysctl_tcp_reordering;
1398
1399 sk->sk_state = TCP_CLOSE;
1400
8292a17a 1401 icsk->icsk_af_ops = &ipv6_specific;
6687e988 1402 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
d83d8461 1403 icsk->icsk_sync_mss = tcp_sync_mss;
1da177e4
LT
1404 sk->sk_write_space = sk_stream_write_space;
1405 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1406
1407 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1408 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1409
1410 atomic_inc(&tcp_sockets_allocated);
1411
1412 return 0;
1413}
1414
1415static int tcp_v6_destroy_sock(struct sock *sk)
1416{
1da177e4
LT
1417 tcp_v4_destroy_sock(sk);
1418 return inet6_destroy_sock(sk);
1419}
1420
1421/* Proc filesystem TCPv6 sock list dumping. */
1422static void get_openreq6(struct seq_file *seq,
60236fdd 1423 struct sock *sk, struct request_sock *req, int i, int uid)
1da177e4 1424{
1da177e4 1425 int ttd = req->expires - jiffies;
ca304b61
ACM
1426 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1427 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1da177e4
LT
1428
1429 if (ttd < 0)
1430 ttd = 0;
1431
1da177e4
LT
1432 seq_printf(seq,
1433 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1434 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1435 i,
1436 src->s6_addr32[0], src->s6_addr32[1],
1437 src->s6_addr32[2], src->s6_addr32[3],
1438 ntohs(inet_sk(sk)->sport),
1439 dest->s6_addr32[0], dest->s6_addr32[1],
1440 dest->s6_addr32[2], dest->s6_addr32[3],
2e6599cb 1441 ntohs(inet_rsk(req)->rmt_port),
1da177e4
LT
1442 TCP_SYN_RECV,
1443 0,0, /* could print option size, but that is af dependent. */
1444 1, /* timers active (only the expire timer) */
1445 jiffies_to_clock_t(ttd),
1446 req->retrans,
1447 uid,
1448 0, /* non standard timer */
1449 0, /* open_requests have no inode */
1450 0, req);
1451}
1452
1453static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1454{
1455 struct in6_addr *dest, *src;
1456 __u16 destp, srcp;
1457 int timer_active;
1458 unsigned long timer_expires;
1459 struct inet_sock *inet = inet_sk(sp);
1460 struct tcp_sock *tp = tcp_sk(sp);
463c84b9 1461 const struct inet_connection_sock *icsk = inet_csk(sp);
1da177e4
LT
1462 struct ipv6_pinfo *np = inet6_sk(sp);
1463
1464 dest = &np->daddr;
1465 src = &np->rcv_saddr;
1466 destp = ntohs(inet->dport);
1467 srcp = ntohs(inet->sport);
463c84b9
ACM
1468
1469 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 1470 timer_active = 1;
463c84b9
ACM
1471 timer_expires = icsk->icsk_timeout;
1472 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 1473 timer_active = 4;
463c84b9 1474 timer_expires = icsk->icsk_timeout;
1da177e4
LT
1475 } else if (timer_pending(&sp->sk_timer)) {
1476 timer_active = 2;
1477 timer_expires = sp->sk_timer.expires;
1478 } else {
1479 timer_active = 0;
1480 timer_expires = jiffies;
1481 }
1482
1483 seq_printf(seq,
1484 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1485 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1486 i,
1487 src->s6_addr32[0], src->s6_addr32[1],
1488 src->s6_addr32[2], src->s6_addr32[3], srcp,
1489 dest->s6_addr32[0], dest->s6_addr32[1],
1490 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1491 sp->sk_state,
47da8ee6
SS
1492 tp->write_seq-tp->snd_una,
1493 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1da177e4
LT
1494 timer_active,
1495 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 1496 icsk->icsk_retransmits,
1da177e4 1497 sock_i_uid(sp),
6687e988 1498 icsk->icsk_probes_out,
1da177e4
LT
1499 sock_i_ino(sp),
1500 atomic_read(&sp->sk_refcnt), sp,
463c84b9
ACM
1501 icsk->icsk_rto,
1502 icsk->icsk_ack.ato,
1503 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1da177e4
LT
1504 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1505 );
1506}
1507
1508static void get_timewait6_sock(struct seq_file *seq,
8feaf0c0 1509 struct inet_timewait_sock *tw, int i)
1da177e4
LT
1510{
1511 struct in6_addr *dest, *src;
1512 __u16 destp, srcp;
0fa1a53e 1513 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1da177e4
LT
1514 int ttd = tw->tw_ttd - jiffies;
1515
1516 if (ttd < 0)
1517 ttd = 0;
1518
0fa1a53e
ACM
1519 dest = &tw6->tw_v6_daddr;
1520 src = &tw6->tw_v6_rcv_saddr;
1da177e4
LT
1521 destp = ntohs(tw->tw_dport);
1522 srcp = ntohs(tw->tw_sport);
1523
1524 seq_printf(seq,
1525 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1526 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1527 i,
1528 src->s6_addr32[0], src->s6_addr32[1],
1529 src->s6_addr32[2], src->s6_addr32[3], srcp,
1530 dest->s6_addr32[0], dest->s6_addr32[1],
1531 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1532 tw->tw_substate, 0, 0,
1533 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1534 atomic_read(&tw->tw_refcnt), tw);
1535}
1536
1537#ifdef CONFIG_PROC_FS
1538static int tcp6_seq_show(struct seq_file *seq, void *v)
1539{
1540 struct tcp_iter_state *st;
1541
1542 if (v == SEQ_START_TOKEN) {
1543 seq_puts(seq,
1544 " sl "
1545 "local_address "
1546 "remote_address "
1547 "st tx_queue rx_queue tr tm->when retrnsmt"
1548 " uid timeout inode\n");
1549 goto out;
1550 }
1551 st = seq->private;
1552
1553 switch (st->state) {
1554 case TCP_SEQ_STATE_LISTENING:
1555 case TCP_SEQ_STATE_ESTABLISHED:
1556 get_tcp6_sock(seq, v, st->num);
1557 break;
1558 case TCP_SEQ_STATE_OPENREQ:
1559 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1560 break;
1561 case TCP_SEQ_STATE_TIME_WAIT:
1562 get_timewait6_sock(seq, v, st->num);
1563 break;
1564 }
1565out:
1566 return 0;
1567}
1568
1569static struct file_operations tcp6_seq_fops;
1570static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1571 .owner = THIS_MODULE,
1572 .name = "tcp6",
1573 .family = AF_INET6,
1574 .seq_show = tcp6_seq_show,
1575 .seq_fops = &tcp6_seq_fops,
1576};
1577
1578int __init tcp6_proc_init(void)
1579{
1580 return tcp_proc_register(&tcp6_seq_afinfo);
1581}
1582
1583void tcp6_proc_exit(void)
1584{
1585 tcp_proc_unregister(&tcp6_seq_afinfo);
1586}
1587#endif
1588
1589struct proto tcpv6_prot = {
1590 .name = "TCPv6",
1591 .owner = THIS_MODULE,
1592 .close = tcp_close,
1593 .connect = tcp_v6_connect,
1594 .disconnect = tcp_disconnect,
463c84b9 1595 .accept = inet_csk_accept,
1da177e4
LT
1596 .ioctl = tcp_ioctl,
1597 .init = tcp_v6_init_sock,
1598 .destroy = tcp_v6_destroy_sock,
1599 .shutdown = tcp_shutdown,
1600 .setsockopt = tcp_setsockopt,
1601 .getsockopt = tcp_getsockopt,
1602 .sendmsg = tcp_sendmsg,
1603 .recvmsg = tcp_recvmsg,
1604 .backlog_rcv = tcp_v6_do_rcv,
1605 .hash = tcp_v6_hash,
1606 .unhash = tcp_unhash,
1607 .get_port = tcp_v6_get_port,
1608 .enter_memory_pressure = tcp_enter_memory_pressure,
1609 .sockets_allocated = &tcp_sockets_allocated,
1610 .memory_allocated = &tcp_memory_allocated,
1611 .memory_pressure = &tcp_memory_pressure,
0a5578cf 1612 .orphan_count = &tcp_orphan_count,
1da177e4
LT
1613 .sysctl_mem = sysctl_tcp_mem,
1614 .sysctl_wmem = sysctl_tcp_wmem,
1615 .sysctl_rmem = sysctl_tcp_rmem,
1616 .max_header = MAX_TCP_HEADER,
1617 .obj_size = sizeof(struct tcp6_sock),
6d6ee43e 1618 .twsk_prot = &tcp6_timewait_sock_ops,
60236fdd 1619 .rsk_prot = &tcp6_request_sock_ops,
543d9cfe
ACM
1620#ifdef CONFIG_COMPAT
1621 .compat_setsockopt = compat_tcp_setsockopt,
1622 .compat_getsockopt = compat_tcp_getsockopt,
1623#endif
1da177e4
LT
1624};
1625
1626static struct inet6_protocol tcpv6_protocol = {
1627 .handler = tcp_v6_rcv,
1628 .err_handler = tcp_v6_err,
a430a43d 1629 .gso_send_check = tcp_v6_gso_send_check,
adcfc7d0 1630 .gso_segment = tcp_tso_segment,
1da177e4
LT
1631 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1632};
1633
1da177e4
LT
1634static struct inet_protosw tcpv6_protosw = {
1635 .type = SOCK_STREAM,
1636 .protocol = IPPROTO_TCP,
1637 .prot = &tcpv6_prot,
1638 .ops = &inet6_stream_ops,
1639 .capability = -1,
1640 .no_check = 0,
d83d8461
ACM
1641 .flags = INET_PROTOSW_PERMANENT |
1642 INET_PROTOSW_ICSK,
1da177e4
LT
1643};
1644
1645void __init tcpv6_init(void)
1646{
1647 /* register inet6 protocol */
1648 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1649 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1650 inet6_register_protosw(&tcpv6_protosw);
ae0f7d5f 1651
c4d93909
ACM
1652 if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW,
1653 IPPROTO_TCP) < 0)
ae0f7d5f 1654 panic("Failed to create the TCPv6 control socket.\n");
1da177e4 1655}