]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv6/tcp_ipv6.c
[MLSXFRM]: Add flow labeling
[net-next-2.6.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/jiffies.h>
35 #include <linux/in.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41 #include <linux/times.h>
42
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
46
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/addrconf.h>
60 #include <net/snmp.h>
61 #include <net/dsfield.h>
62 #include <net/timewait_sock.h>
63
64 #include <asm/uaccess.h>
65
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
68
69 /* Socket used for sending RSTs and ACKs */
70 static struct socket *tcp6_socket;
71
72 static void     tcp_v6_send_reset(struct sk_buff *skb);
73 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
74 static void     tcp_v6_send_check(struct sock *sk, int len, 
75                                   struct sk_buff *skb);
76
77 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78
79 static struct inet_connection_sock_af_ops ipv6_mapped;
80 static struct inet_connection_sock_af_ops ipv6_specific;
81
82 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
83 {
84         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
85                                  inet6_csk_bind_conflict);
86 }
87
88 static void tcp_v6_hash(struct sock *sk)
89 {
90         if (sk->sk_state != TCP_CLOSE) {
91                 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
92                         tcp_prot.hash(sk);
93                         return;
94                 }
95                 local_bh_disable();
96                 __inet6_hash(&tcp_hashinfo, sk);
97                 local_bh_enable();
98         }
99 }
100
101 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
102                                    struct in6_addr *saddr, 
103                                    struct in6_addr *daddr, 
104                                    unsigned long base)
105 {
106         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
107 }
108
109 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
110 {
111         if (skb->protocol == htons(ETH_P_IPV6)) {
112                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
113                                                     skb->nh.ipv6h->saddr.s6_addr32,
114                                                     skb->h.th->dest,
115                                                     skb->h.th->source);
116         } else {
117                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
118                                                   skb->nh.iph->saddr,
119                                                   skb->h.th->dest,
120                                                   skb->h.th->source);
121         }
122 }
123
124 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
125                           int addr_len)
126 {
127         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
128         struct inet_sock *inet = inet_sk(sk);
129         struct inet_connection_sock *icsk = inet_csk(sk);
130         struct ipv6_pinfo *np = inet6_sk(sk);
131         struct tcp_sock *tp = tcp_sk(sk);
132         struct in6_addr *saddr = NULL, *final_p = NULL, final;
133         struct flowi fl;
134         struct dst_entry *dst;
135         int addr_type;
136         int err;
137
138         if (addr_len < SIN6_LEN_RFC2133) 
139                 return -EINVAL;
140
141         if (usin->sin6_family != AF_INET6) 
142                 return(-EAFNOSUPPORT);
143
144         memset(&fl, 0, sizeof(fl));
145
146         if (np->sndflow) {
147                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
148                 IP6_ECN_flow_init(fl.fl6_flowlabel);
149                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
150                         struct ip6_flowlabel *flowlabel;
151                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
152                         if (flowlabel == NULL)
153                                 return -EINVAL;
154                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
155                         fl6_sock_release(flowlabel);
156                 }
157         }
158
159         /*
160          *      connect() to INADDR_ANY means loopback (BSD'ism).
161          */
162         
163         if(ipv6_addr_any(&usin->sin6_addr))
164                 usin->sin6_addr.s6_addr[15] = 0x1; 
165
166         addr_type = ipv6_addr_type(&usin->sin6_addr);
167
168         if(addr_type & IPV6_ADDR_MULTICAST)
169                 return -ENETUNREACH;
170
171         if (addr_type&IPV6_ADDR_LINKLOCAL) {
172                 if (addr_len >= sizeof(struct sockaddr_in6) &&
173                     usin->sin6_scope_id) {
174                         /* If interface is set while binding, indices
175                          * must coincide.
176                          */
177                         if (sk->sk_bound_dev_if &&
178                             sk->sk_bound_dev_if != usin->sin6_scope_id)
179                                 return -EINVAL;
180
181                         sk->sk_bound_dev_if = usin->sin6_scope_id;
182                 }
183
184                 /* Connect to link-local address requires an interface */
185                 if (!sk->sk_bound_dev_if)
186                         return -EINVAL;
187         }
188
189         if (tp->rx_opt.ts_recent_stamp &&
190             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
191                 tp->rx_opt.ts_recent = 0;
192                 tp->rx_opt.ts_recent_stamp = 0;
193                 tp->write_seq = 0;
194         }
195
196         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
197         np->flow_label = fl.fl6_flowlabel;
198
199         /*
200          *      TCP over IPv4
201          */
202
203         if (addr_type == IPV6_ADDR_MAPPED) {
204                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205                 struct sockaddr_in sin;
206
207                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
208
209                 if (__ipv6_only_sock(sk))
210                         return -ENETUNREACH;
211
212                 sin.sin_family = AF_INET;
213                 sin.sin_port = usin->sin6_port;
214                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
215
216                 icsk->icsk_af_ops = &ipv6_mapped;
217                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218
219                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220
221                 if (err) {
222                         icsk->icsk_ext_hdr_len = exthdrlen;
223                         icsk->icsk_af_ops = &ipv6_specific;
224                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
225                         goto failure;
226                 } else {
227                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
228                                       inet->saddr);
229                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
230                                       inet->rcv_saddr);
231                 }
232
233                 return err;
234         }
235
236         if (!ipv6_addr_any(&np->rcv_saddr))
237                 saddr = &np->rcv_saddr;
238
239         fl.proto = IPPROTO_TCP;
240         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241         ipv6_addr_copy(&fl.fl6_src,
242                        (saddr ? saddr : &np->saddr));
243         fl.oif = sk->sk_bound_dev_if;
244         fl.fl_ip_dport = usin->sin6_port;
245         fl.fl_ip_sport = inet->sport;
246
247         if (np->opt && np->opt->srcrt) {
248                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
249                 ipv6_addr_copy(&final, &fl.fl6_dst);
250                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
251                 final_p = &final;
252         }
253
254         security_sk_classify_flow(sk, &fl);
255
256         err = ip6_dst_lookup(sk, &dst, &fl);
257         if (err)
258                 goto failure;
259         if (final_p)
260                 ipv6_addr_copy(&fl.fl6_dst, final_p);
261
262         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
263                 goto failure;
264
265         if (saddr == NULL) {
266                 saddr = &fl.fl6_src;
267                 ipv6_addr_copy(&np->rcv_saddr, saddr);
268         }
269
270         /* set the source address */
271         ipv6_addr_copy(&np->saddr, saddr);
272         inet->rcv_saddr = LOOPBACK4_IPV6;
273
274         sk->sk_gso_type = SKB_GSO_TCPV6;
275         __ip6_dst_store(sk, dst, NULL);
276
277         icsk->icsk_ext_hdr_len = 0;
278         if (np->opt)
279                 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
280                                           np->opt->opt_nflen);
281
282         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
283
284         inet->dport = usin->sin6_port;
285
286         tcp_set_state(sk, TCP_SYN_SENT);
287         err = inet6_hash_connect(&tcp_death_row, sk);
288         if (err)
289                 goto late_failure;
290
291         if (!tp->write_seq)
292                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
293                                                              np->daddr.s6_addr32,
294                                                              inet->sport,
295                                                              inet->dport);
296
297         err = tcp_connect(sk);
298         if (err)
299                 goto late_failure;
300
301         return 0;
302
303 late_failure:
304         tcp_set_state(sk, TCP_CLOSE);
305         __sk_dst_reset(sk);
306 failure:
307         inet->dport = 0;
308         sk->sk_route_caps = 0;
309         return err;
310 }
311
312 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
313                 int type, int code, int offset, __u32 info)
314 {
315         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
316         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
317         struct ipv6_pinfo *np;
318         struct sock *sk;
319         int err;
320         struct tcp_sock *tp; 
321         __u32 seq;
322
323         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
324                           th->source, skb->dev->ifindex);
325
326         if (sk == NULL) {
327                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
328                 return;
329         }
330
331         if (sk->sk_state == TCP_TIME_WAIT) {
332                 inet_twsk_put((struct inet_timewait_sock *)sk);
333                 return;
334         }
335
336         bh_lock_sock(sk);
337         if (sock_owned_by_user(sk))
338                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
339
340         if (sk->sk_state == TCP_CLOSE)
341                 goto out;
342
343         tp = tcp_sk(sk);
344         seq = ntohl(th->seq); 
345         if (sk->sk_state != TCP_LISTEN &&
346             !between(seq, tp->snd_una, tp->snd_nxt)) {
347                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
348                 goto out;
349         }
350
351         np = inet6_sk(sk);
352
353         if (type == ICMPV6_PKT_TOOBIG) {
354                 struct dst_entry *dst = NULL;
355
356                 if (sock_owned_by_user(sk))
357                         goto out;
358                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359                         goto out;
360
361                 /* icmp should have updated the destination cache entry */
362                 dst = __sk_dst_check(sk, np->dst_cookie);
363
364                 if (dst == NULL) {
365                         struct inet_sock *inet = inet_sk(sk);
366                         struct flowi fl;
367
368                         /* BUGGG_FUTURE: Again, it is not clear how
369                            to handle rthdr case. Ignore this complexity
370                            for now.
371                          */
372                         memset(&fl, 0, sizeof(fl));
373                         fl.proto = IPPROTO_TCP;
374                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
375                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
376                         fl.oif = sk->sk_bound_dev_if;
377                         fl.fl_ip_dport = inet->dport;
378                         fl.fl_ip_sport = inet->sport;
379                         security_skb_classify_flow(skb, &fl);
380
381                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
382                                 sk->sk_err_soft = -err;
383                                 goto out;
384                         }
385
386                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
387                                 sk->sk_err_soft = -err;
388                                 goto out;
389                         }
390
391                 } else
392                         dst_hold(dst);
393
394                 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
395                         tcp_sync_mss(sk, dst_mtu(dst));
396                         tcp_simple_retransmit(sk);
397                 } /* else let the usual retransmit timer handle it */
398                 dst_release(dst);
399                 goto out;
400         }
401
402         icmpv6_err_convert(type, code, &err);
403
404         /* Might be for an request_sock */
405         switch (sk->sk_state) {
406                 struct request_sock *req, **prev;
407         case TCP_LISTEN:
408                 if (sock_owned_by_user(sk))
409                         goto out;
410
411                 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
412                                            &hdr->saddr, inet6_iif(skb));
413                 if (!req)
414                         goto out;
415
416                 /* ICMPs are not backlogged, hence we cannot get
417                  * an established socket here.
418                  */
419                 BUG_TRAP(req->sk == NULL);
420
421                 if (seq != tcp_rsk(req)->snt_isn) {
422                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
423                         goto out;
424                 }
425
426                 inet_csk_reqsk_queue_drop(sk, req, prev);
427                 goto out;
428
429         case TCP_SYN_SENT:
430         case TCP_SYN_RECV:  /* Cannot happen.
431                                It can, it SYNs are crossed. --ANK */ 
432                 if (!sock_owned_by_user(sk)) {
433                         sk->sk_err = err;
434                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
435
436                         tcp_done(sk);
437                 } else
438                         sk->sk_err_soft = err;
439                 goto out;
440         }
441
442         if (!sock_owned_by_user(sk) && np->recverr) {
443                 sk->sk_err = err;
444                 sk->sk_error_report(sk);
445         } else
446                 sk->sk_err_soft = err;
447
448 out:
449         bh_unlock_sock(sk);
450         sock_put(sk);
451 }
452
453
454 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
455                               struct dst_entry *dst)
456 {
457         struct inet6_request_sock *treq = inet6_rsk(req);
458         struct ipv6_pinfo *np = inet6_sk(sk);
459         struct sk_buff * skb;
460         struct ipv6_txoptions *opt = NULL;
461         struct in6_addr * final_p = NULL, final;
462         struct flowi fl;
463         int err = -1;
464
465         memset(&fl, 0, sizeof(fl));
466         fl.proto = IPPROTO_TCP;
467         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
468         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
469         fl.fl6_flowlabel = 0;
470         fl.oif = treq->iif;
471         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
472         fl.fl_ip_sport = inet_sk(sk)->sport;
473         security_sk_classify_flow(sk, &fl);
474
475         if (dst == NULL) {
476                 opt = np->opt;
477                 if (opt == NULL &&
478                     np->rxopt.bits.osrcrt == 2 &&
479                     treq->pktopts) {
480                         struct sk_buff *pktopts = treq->pktopts;
481                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
482                         if (rxopt->srcrt)
483                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
484                 }
485
486                 if (opt && opt->srcrt) {
487                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
488                         ipv6_addr_copy(&final, &fl.fl6_dst);
489                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
490                         final_p = &final;
491                 }
492
493                 err = ip6_dst_lookup(sk, &dst, &fl);
494                 if (err)
495                         goto done;
496                 if (final_p)
497                         ipv6_addr_copy(&fl.fl6_dst, final_p);
498                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
499                         goto done;
500         }
501
502         skb = tcp_make_synack(sk, dst, req);
503         if (skb) {
504                 struct tcphdr *th = skb->h.th;
505
506                 th->check = tcp_v6_check(th, skb->len,
507                                          &treq->loc_addr, &treq->rmt_addr,
508                                          csum_partial((char *)th, skb->len, skb->csum));
509
510                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
511                 err = ip6_xmit(sk, skb, &fl, opt, 0);
512                 if (err == NET_XMIT_CN)
513                         err = 0;
514         }
515
516 done:
517         if (opt && opt != np->opt)
518                 sock_kfree_s(sk, opt, opt->tot_len);
519         dst_release(dst);
520         return err;
521 }
522
523 static void tcp_v6_reqsk_destructor(struct request_sock *req)
524 {
525         if (inet6_rsk(req)->pktopts)
526                 kfree_skb(inet6_rsk(req)->pktopts);
527 }
528
529 static struct request_sock_ops tcp6_request_sock_ops = {
530         .family         =       AF_INET6,
531         .obj_size       =       sizeof(struct tcp6_request_sock),
532         .rtx_syn_ack    =       tcp_v6_send_synack,
533         .send_ack       =       tcp_v6_reqsk_send_ack,
534         .destructor     =       tcp_v6_reqsk_destructor,
535         .send_reset     =       tcp_v6_send_reset
536 };
537
538 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
539         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
540         .twsk_unique    = tcp_twsk_unique,
541 };
542
543 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
544 {
545         struct ipv6_pinfo *np = inet6_sk(sk);
546         struct tcphdr *th = skb->h.th;
547
548         if (skb->ip_summed == CHECKSUM_HW) {
549                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
550                 skb->csum = offsetof(struct tcphdr, check);
551         } else {
552                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
553                                             csum_partial((char *)th, th->doff<<2, 
554                                                          skb->csum));
555         }
556 }
557
558 static int tcp_v6_gso_send_check(struct sk_buff *skb)
559 {
560         struct ipv6hdr *ipv6h;
561         struct tcphdr *th;
562
563         if (!pskb_may_pull(skb, sizeof(*th)))
564                 return -EINVAL;
565
566         ipv6h = skb->nh.ipv6h;
567         th = skb->h.th;
568
569         th->check = 0;
570         th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
571                                      IPPROTO_TCP, 0);
572         skb->csum = offsetof(struct tcphdr, check);
573         skb->ip_summed = CHECKSUM_HW;
574         return 0;
575 }
576
577 static void tcp_v6_send_reset(struct sk_buff *skb)
578 {
579         struct tcphdr *th = skb->h.th, *t1; 
580         struct sk_buff *buff;
581         struct flowi fl;
582
583         if (th->rst)
584                 return;
585
586         if (!ipv6_unicast_destination(skb))
587                 return; 
588
589         /*
590          * We need to grab some memory, and put together an RST,
591          * and then put it into the queue to be sent.
592          */
593
594         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
595                          GFP_ATOMIC);
596         if (buff == NULL) 
597                 return;
598
599         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
600
601         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
602
603         /* Swap the send and the receive. */
604         memset(t1, 0, sizeof(*t1));
605         t1->dest = th->source;
606         t1->source = th->dest;
607         t1->doff = sizeof(*t1)/4;
608         t1->rst = 1;
609   
610         if(th->ack) {
611                 t1->seq = th->ack_seq;
612         } else {
613                 t1->ack = 1;
614                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
615                                     + skb->len - (th->doff<<2));
616         }
617
618         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
619
620         memset(&fl, 0, sizeof(fl));
621         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
622         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
623
624         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
625                                     sizeof(*t1), IPPROTO_TCP,
626                                     buff->csum);
627
628         fl.proto = IPPROTO_TCP;
629         fl.oif = inet6_iif(skb);
630         fl.fl_ip_dport = t1->dest;
631         fl.fl_ip_sport = t1->source;
632         security_skb_classify_flow(skb, &fl);
633
634         /* sk = NULL, but it is safe for now. RST socket required. */
635         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
636
637                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
638                         ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
639                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
640                         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
641                         return;
642                 }
643         }
644
645         kfree_skb(buff);
646 }
647
648 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
649 {
650         struct tcphdr *th = skb->h.th, *t1;
651         struct sk_buff *buff;
652         struct flowi fl;
653         int tot_len = sizeof(struct tcphdr);
654
655         if (ts)
656                 tot_len += 3*4;
657
658         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
659                          GFP_ATOMIC);
660         if (buff == NULL)
661                 return;
662
663         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
664
665         t1 = (struct tcphdr *) skb_push(buff,tot_len);
666
667         /* Swap the send and the receive. */
668         memset(t1, 0, sizeof(*t1));
669         t1->dest = th->source;
670         t1->source = th->dest;
671         t1->doff = tot_len/4;
672         t1->seq = htonl(seq);
673         t1->ack_seq = htonl(ack);
674         t1->ack = 1;
675         t1->window = htons(win);
676         
677         if (ts) {
678                 u32 *ptr = (u32*)(t1 + 1);
679                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
680                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
681                 *ptr++ = htonl(tcp_time_stamp);
682                 *ptr = htonl(ts);
683         }
684
685         buff->csum = csum_partial((char *)t1, tot_len, 0);
686
687         memset(&fl, 0, sizeof(fl));
688         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
689         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
690
691         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
692                                     tot_len, IPPROTO_TCP,
693                                     buff->csum);
694
695         fl.proto = IPPROTO_TCP;
696         fl.oif = inet6_iif(skb);
697         fl.fl_ip_dport = t1->dest;
698         fl.fl_ip_sport = t1->source;
699         security_skb_classify_flow(skb, &fl);
700
701         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
702                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
703                         ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
704                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
705                         return;
706                 }
707         }
708
709         kfree_skb(buff);
710 }
711
712 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
713 {
714         struct inet_timewait_sock *tw = inet_twsk(sk);
715         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
716
717         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
718                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
719                         tcptw->tw_ts_recent);
720
721         inet_twsk_put(tw);
722 }
723
724 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
725 {
726         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
727 }
728
729
730 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
731 {
732         struct request_sock *req, **prev;
733         const struct tcphdr *th = skb->h.th;
734         struct sock *nsk;
735
736         /* Find possible connection requests. */
737         req = inet6_csk_search_req(sk, &prev, th->source,
738                                    &skb->nh.ipv6h->saddr,
739                                    &skb->nh.ipv6h->daddr, inet6_iif(skb));
740         if (req)
741                 return tcp_check_req(sk, skb, req, prev);
742
743         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
744                                          th->source, &skb->nh.ipv6h->daddr,
745                                          ntohs(th->dest), inet6_iif(skb));
746
747         if (nsk) {
748                 if (nsk->sk_state != TCP_TIME_WAIT) {
749                         bh_lock_sock(nsk);
750                         return nsk;
751                 }
752                 inet_twsk_put((struct inet_timewait_sock *)nsk);
753                 return NULL;
754         }
755
756 #if 0 /*def CONFIG_SYN_COOKIES*/
757         if (!th->rst && !th->syn && th->ack)
758                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
759 #endif
760         return sk;
761 }
762
763 /* FIXME: this is substantially similar to the ipv4 code.
764  * Can some kind of merge be done? -- erics
765  */
766 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
767 {
768         struct inet6_request_sock *treq;
769         struct ipv6_pinfo *np = inet6_sk(sk);
770         struct tcp_options_received tmp_opt;
771         struct tcp_sock *tp = tcp_sk(sk);
772         struct request_sock *req = NULL;
773         __u32 isn = TCP_SKB_CB(skb)->when;
774
775         if (skb->protocol == htons(ETH_P_IP))
776                 return tcp_v4_conn_request(sk, skb);
777
778         if (!ipv6_unicast_destination(skb))
779                 goto drop; 
780
781         /*
782          *      There are no SYN attacks on IPv6, yet...        
783          */
784         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
785                 if (net_ratelimit())
786                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
787                 goto drop;              
788         }
789
790         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
791                 goto drop;
792
793         req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
794         if (req == NULL)
795                 goto drop;
796
797         tcp_clear_options(&tmp_opt);
798         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
799         tmp_opt.user_mss = tp->rx_opt.user_mss;
800
801         tcp_parse_options(skb, &tmp_opt, 0);
802
803         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
804         tcp_openreq_init(req, &tmp_opt, skb);
805
806         treq = inet6_rsk(req);
807         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
808         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
809         TCP_ECN_create_request(req, skb->h.th);
810         treq->pktopts = NULL;
811         if (ipv6_opt_accepted(sk, skb) ||
812             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
813             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
814                 atomic_inc(&skb->users);
815                 treq->pktopts = skb;
816         }
817         treq->iif = sk->sk_bound_dev_if;
818
819         /* So that link locals have meaning */
820         if (!sk->sk_bound_dev_if &&
821             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
822                 treq->iif = inet6_iif(skb);
823
824         if (isn == 0) 
825                 isn = tcp_v6_init_sequence(sk,skb);
826
827         tcp_rsk(req)->snt_isn = isn;
828
829         if (tcp_v6_send_synack(sk, req, NULL))
830                 goto drop;
831
832         inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
833         return 0;
834
835 drop:
836         if (req)
837                 reqsk_free(req);
838
839         return 0; /* don't send reset */
840 }
841
842 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
843                                           struct request_sock *req,
844                                           struct dst_entry *dst)
845 {
846         struct inet6_request_sock *treq = inet6_rsk(req);
847         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
848         struct tcp6_sock *newtcp6sk;
849         struct inet_sock *newinet;
850         struct tcp_sock *newtp;
851         struct sock *newsk;
852         struct ipv6_txoptions *opt;
853
854         if (skb->protocol == htons(ETH_P_IP)) {
855                 /*
856                  *      v6 mapped
857                  */
858
859                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
860
861                 if (newsk == NULL) 
862                         return NULL;
863
864                 newtcp6sk = (struct tcp6_sock *)newsk;
865                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
866
867                 newinet = inet_sk(newsk);
868                 newnp = inet6_sk(newsk);
869                 newtp = tcp_sk(newsk);
870
871                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
872
873                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
874                               newinet->daddr);
875
876                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
877                               newinet->saddr);
878
879                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
880
881                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
882                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
883                 newnp->pktoptions  = NULL;
884                 newnp->opt         = NULL;
885                 newnp->mcast_oif   = inet6_iif(skb);
886                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
887
888                 /*
889                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
890                  * here, tcp_create_openreq_child now does this for us, see the comment in
891                  * that function for the gory details. -acme
892                  */
893
894                 /* It is tricky place. Until this moment IPv4 tcp
895                    worked with IPv6 icsk.icsk_af_ops.
896                    Sync it now.
897                  */
898                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
899
900                 return newsk;
901         }
902
903         opt = np->opt;
904
905         if (sk_acceptq_is_full(sk))
906                 goto out_overflow;
907
908         if (np->rxopt.bits.osrcrt == 2 &&
909             opt == NULL && treq->pktopts) {
910                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
911                 if (rxopt->srcrt)
912                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
913         }
914
915         if (dst == NULL) {
916                 struct in6_addr *final_p = NULL, final;
917                 struct flowi fl;
918
919                 memset(&fl, 0, sizeof(fl));
920                 fl.proto = IPPROTO_TCP;
921                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
922                 if (opt && opt->srcrt) {
923                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
924                         ipv6_addr_copy(&final, &fl.fl6_dst);
925                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
926                         final_p = &final;
927                 }
928                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
929                 fl.oif = sk->sk_bound_dev_if;
930                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
931                 fl.fl_ip_sport = inet_sk(sk)->sport;
932                 security_sk_classify_flow(sk, &fl);
933
934                 if (ip6_dst_lookup(sk, &dst, &fl))
935                         goto out;
936
937                 if (final_p)
938                         ipv6_addr_copy(&fl.fl6_dst, final_p);
939
940                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
941                         goto out;
942         } 
943
944         newsk = tcp_create_openreq_child(sk, req, skb);
945         if (newsk == NULL)
946                 goto out;
947
948         /*
949          * No need to charge this sock to the relevant IPv6 refcnt debug socks
950          * count here, tcp_create_openreq_child now does this for us, see the
951          * comment in that function for the gory details. -acme
952          */
953
954         newsk->sk_gso_type = SKB_GSO_TCPV6;
955         __ip6_dst_store(newsk, dst, NULL);
956
957         newtcp6sk = (struct tcp6_sock *)newsk;
958         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
959
960         newtp = tcp_sk(newsk);
961         newinet = inet_sk(newsk);
962         newnp = inet6_sk(newsk);
963
964         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
965
966         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
967         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
968         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
969         newsk->sk_bound_dev_if = treq->iif;
970
971         /* Now IPv6 options... 
972
973            First: no IPv4 options.
974          */
975         newinet->opt = NULL;
976
977         /* Clone RX bits */
978         newnp->rxopt.all = np->rxopt.all;
979
980         /* Clone pktoptions received with SYN */
981         newnp->pktoptions = NULL;
982         if (treq->pktopts != NULL) {
983                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
984                 kfree_skb(treq->pktopts);
985                 treq->pktopts = NULL;
986                 if (newnp->pktoptions)
987                         skb_set_owner_r(newnp->pktoptions, newsk);
988         }
989         newnp->opt        = NULL;
990         newnp->mcast_oif  = inet6_iif(skb);
991         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
992
993         /* Clone native IPv6 options from listening socket (if any)
994
995            Yes, keeping reference count would be much more clever,
996            but we make one more one thing there: reattach optmem
997            to newsk.
998          */
999         if (opt) {
1000                 newnp->opt = ipv6_dup_options(newsk, opt);
1001                 if (opt != np->opt)
1002                         sock_kfree_s(sk, opt, opt->tot_len);
1003         }
1004
1005         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1006         if (newnp->opt)
1007                 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1008                                                      newnp->opt->opt_flen);
1009
1010         tcp_mtup_init(newsk);
1011         tcp_sync_mss(newsk, dst_mtu(dst));
1012         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1013         tcp_initialize_rcv_mss(newsk);
1014
1015         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1016
1017         __inet6_hash(&tcp_hashinfo, newsk);
1018         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1019
1020         return newsk;
1021
1022 out_overflow:
1023         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1024 out:
1025         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1026         if (opt && opt != np->opt)
1027                 sock_kfree_s(sk, opt, opt->tot_len);
1028         dst_release(dst);
1029         return NULL;
1030 }
1031
1032 static int tcp_v6_checksum_init(struct sk_buff *skb)
1033 {
1034         if (skb->ip_summed == CHECKSUM_HW) {
1035                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1036                                   &skb->nh.ipv6h->daddr,skb->csum)) {
1037                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1038                         return 0;
1039                 }
1040         }
1041
1042         skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1043                                   &skb->nh.ipv6h->daddr, 0);
1044
1045         if (skb->len <= 76) {
1046                 return __skb_checksum_complete(skb);
1047         }
1048         return 0;
1049 }
1050
1051 /* The socket must have it's spinlock held when we get
1052  * here.
1053  *
1054  * We have a potential double-lock case here, so even when
1055  * doing backlog processing we use the BH locking scheme.
1056  * This is because we cannot sleep with the original spinlock
1057  * held.
1058  */
1059 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1060 {
1061         struct ipv6_pinfo *np = inet6_sk(sk);
1062         struct tcp_sock *tp;
1063         struct sk_buff *opt_skb = NULL;
1064
1065         /* Imagine: socket is IPv6. IPv4 packet arrives,
1066            goes to IPv4 receive handler and backlogged.
1067            From backlog it always goes here. Kerboom...
1068            Fortunately, tcp_rcv_established and rcv_established
1069            handle them correctly, but it is not case with
1070            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1071          */
1072
1073         if (skb->protocol == htons(ETH_P_IP))
1074                 return tcp_v4_do_rcv(sk, skb);
1075
1076         if (sk_filter(sk, skb, 0))
1077                 goto discard;
1078
1079         /*
1080          *      socket locking is here for SMP purposes as backlog rcv
1081          *      is currently called with bh processing disabled.
1082          */
1083
1084         /* Do Stevens' IPV6_PKTOPTIONS.
1085
1086            Yes, guys, it is the only place in our code, where we
1087            may make it not affecting IPv4.
1088            The rest of code is protocol independent,
1089            and I do not like idea to uglify IPv4.
1090
1091            Actually, all the idea behind IPV6_PKTOPTIONS
1092            looks not very well thought. For now we latch
1093            options, received in the last packet, enqueued
1094            by tcp. Feel free to propose better solution.
1095                                                --ANK (980728)
1096          */
1097         if (np->rxopt.all)
1098                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1099
1100         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1101                 TCP_CHECK_TIMER(sk);
1102                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1103                         goto reset;
1104                 TCP_CHECK_TIMER(sk);
1105                 if (opt_skb)
1106                         goto ipv6_pktoptions;
1107                 return 0;
1108         }
1109
1110         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1111                 goto csum_err;
1112
1113         if (sk->sk_state == TCP_LISTEN) { 
1114                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1115                 if (!nsk)
1116                         goto discard;
1117
1118                 /*
1119                  * Queue it on the new socket if the new socket is active,
1120                  * otherwise we just shortcircuit this and continue with
1121                  * the new socket..
1122                  */
1123                 if(nsk != sk) {
1124                         if (tcp_child_process(sk, nsk, skb))
1125                                 goto reset;
1126                         if (opt_skb)
1127                                 __kfree_skb(opt_skb);
1128                         return 0;
1129                 }
1130         }
1131
1132         TCP_CHECK_TIMER(sk);
1133         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1134                 goto reset;
1135         TCP_CHECK_TIMER(sk);
1136         if (opt_skb)
1137                 goto ipv6_pktoptions;
1138         return 0;
1139
1140 reset:
1141         tcp_v6_send_reset(skb);
1142 discard:
1143         if (opt_skb)
1144                 __kfree_skb(opt_skb);
1145         kfree_skb(skb);
1146         return 0;
1147 csum_err:
1148         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1149         goto discard;
1150
1151
1152 ipv6_pktoptions:
1153         /* Do you ask, what is it?
1154
1155            1. skb was enqueued by tcp.
1156            2. skb is added to tail of read queue, rather than out of order.
1157            3. socket is not in passive state.
1158            4. Finally, it really contains options, which user wants to receive.
1159          */
1160         tp = tcp_sk(sk);
1161         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1162             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1163                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1164                         np->mcast_oif = inet6_iif(opt_skb);
1165                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1166                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1167                 if (ipv6_opt_accepted(sk, opt_skb)) {
1168                         skb_set_owner_r(opt_skb, sk);
1169                         opt_skb = xchg(&np->pktoptions, opt_skb);
1170                 } else {
1171                         __kfree_skb(opt_skb);
1172                         opt_skb = xchg(&np->pktoptions, NULL);
1173                 }
1174         }
1175
1176         if (opt_skb)
1177                 kfree_skb(opt_skb);
1178         return 0;
1179 }
1180
1181 static int tcp_v6_rcv(struct sk_buff **pskb)
1182 {
1183         struct sk_buff *skb = *pskb;
1184         struct tcphdr *th;      
1185         struct sock *sk;
1186         int ret;
1187
1188         if (skb->pkt_type != PACKET_HOST)
1189                 goto discard_it;
1190
1191         /*
1192          *      Count it even if it's bad.
1193          */
1194         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1195
1196         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1197                 goto discard_it;
1198
1199         th = skb->h.th;
1200
1201         if (th->doff < sizeof(struct tcphdr)/4)
1202                 goto bad_packet;
1203         if (!pskb_may_pull(skb, th->doff*4))
1204                 goto discard_it;
1205
1206         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1207              tcp_v6_checksum_init(skb)))
1208                 goto bad_packet;
1209
1210         th = skb->h.th;
1211         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1212         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1213                                     skb->len - th->doff*4);
1214         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1215         TCP_SKB_CB(skb)->when = 0;
1216         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1217         TCP_SKB_CB(skb)->sacked = 0;
1218
1219         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1220                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1221                             inet6_iif(skb));
1222
1223         if (!sk)
1224                 goto no_tcp_socket;
1225
1226 process:
1227         if (sk->sk_state == TCP_TIME_WAIT)
1228                 goto do_time_wait;
1229
1230         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1231                 goto discard_and_relse;
1232
1233         if (sk_filter(sk, skb, 0))
1234                 goto discard_and_relse;
1235
1236         skb->dev = NULL;
1237
1238         bh_lock_sock(sk);
1239         ret = 0;
1240         if (!sock_owned_by_user(sk)) {
1241 #ifdef CONFIG_NET_DMA
1242                 struct tcp_sock *tp = tcp_sk(sk);
1243                 if (tp->ucopy.dma_chan)
1244                         ret = tcp_v6_do_rcv(sk, skb);
1245                 else
1246 #endif
1247                 {
1248                         if (!tcp_prequeue(sk, skb))
1249                                 ret = tcp_v6_do_rcv(sk, skb);
1250                 }
1251         } else
1252                 sk_add_backlog(sk, skb);
1253         bh_unlock_sock(sk);
1254
1255         sock_put(sk);
1256         return ret ? -1 : 0;
1257
1258 no_tcp_socket:
1259         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1260                 goto discard_it;
1261
1262         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1263 bad_packet:
1264                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1265         } else {
1266                 tcp_v6_send_reset(skb);
1267         }
1268
1269 discard_it:
1270
1271         /*
1272          *      Discard frame
1273          */
1274
1275         kfree_skb(skb);
1276         return 0;
1277
1278 discard_and_relse:
1279         sock_put(sk);
1280         goto discard_it;
1281
1282 do_time_wait:
1283         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1284                 inet_twsk_put((struct inet_timewait_sock *)sk);
1285                 goto discard_it;
1286         }
1287
1288         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1289                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1290                 inet_twsk_put((struct inet_timewait_sock *)sk);
1291                 goto discard_it;
1292         }
1293
1294         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1295                                            skb, th)) {
1296         case TCP_TW_SYN:
1297         {
1298                 struct sock *sk2;
1299
1300                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1301                                             &skb->nh.ipv6h->daddr,
1302                                             ntohs(th->dest), inet6_iif(skb));
1303                 if (sk2 != NULL) {
1304                         struct inet_timewait_sock *tw = inet_twsk(sk);
1305                         inet_twsk_deschedule(tw, &tcp_death_row);
1306                         inet_twsk_put(tw);
1307                         sk = sk2;
1308                         goto process;
1309                 }
1310                 /* Fall through to ACK */
1311         }
1312         case TCP_TW_ACK:
1313                 tcp_v6_timewait_ack(sk, skb);
1314                 break;
1315         case TCP_TW_RST:
1316                 goto no_tcp_socket;
1317         case TCP_TW_SUCCESS:;
1318         }
1319         goto discard_it;
1320 }
1321
1322 static int tcp_v6_remember_stamp(struct sock *sk)
1323 {
1324         /* Alas, not yet... */
1325         return 0;
1326 }
1327
1328 static struct inet_connection_sock_af_ops ipv6_specific = {
1329         .queue_xmit        = inet6_csk_xmit,
1330         .send_check        = tcp_v6_send_check,
1331         .rebuild_header    = inet6_sk_rebuild_header,
1332         .conn_request      = tcp_v6_conn_request,
1333         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1334         .remember_stamp    = tcp_v6_remember_stamp,
1335         .net_header_len    = sizeof(struct ipv6hdr),
1336         .setsockopt        = ipv6_setsockopt,
1337         .getsockopt        = ipv6_getsockopt,
1338         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1339         .sockaddr_len      = sizeof(struct sockaddr_in6),
1340 #ifdef CONFIG_COMPAT
1341         .compat_setsockopt = compat_ipv6_setsockopt,
1342         .compat_getsockopt = compat_ipv6_getsockopt,
1343 #endif
1344 };
1345
1346 /*
1347  *      TCP over IPv4 via INET6 API
1348  */
1349
1350 static struct inet_connection_sock_af_ops ipv6_mapped = {
1351         .queue_xmit        = ip_queue_xmit,
1352         .send_check        = tcp_v4_send_check,
1353         .rebuild_header    = inet_sk_rebuild_header,
1354         .conn_request      = tcp_v6_conn_request,
1355         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1356         .remember_stamp    = tcp_v4_remember_stamp,
1357         .net_header_len    = sizeof(struct iphdr),
1358         .setsockopt        = ipv6_setsockopt,
1359         .getsockopt        = ipv6_getsockopt,
1360         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1361         .sockaddr_len      = sizeof(struct sockaddr_in6),
1362 #ifdef CONFIG_COMPAT
1363         .compat_setsockopt = compat_ipv6_setsockopt,
1364         .compat_getsockopt = compat_ipv6_getsockopt,
1365 #endif
1366 };
1367
1368 /* NOTE: A lot of things set to zero explicitly by call to
1369  *       sk_alloc() so need not be done here.
1370  */
1371 static int tcp_v6_init_sock(struct sock *sk)
1372 {
1373         struct inet_connection_sock *icsk = inet_csk(sk);
1374         struct tcp_sock *tp = tcp_sk(sk);
1375
1376         skb_queue_head_init(&tp->out_of_order_queue);
1377         tcp_init_xmit_timers(sk);
1378         tcp_prequeue_init(tp);
1379
1380         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1381         tp->mdev = TCP_TIMEOUT_INIT;
1382
1383         /* So many TCP implementations out there (incorrectly) count the
1384          * initial SYN frame in their delayed-ACK and congestion control
1385          * algorithms that we must have the following bandaid to talk
1386          * efficiently to them.  -DaveM
1387          */
1388         tp->snd_cwnd = 2;
1389
1390         /* See draft-stevens-tcpca-spec-01 for discussion of the
1391          * initialization of these values.
1392          */
1393         tp->snd_ssthresh = 0x7fffffff;
1394         tp->snd_cwnd_clamp = ~0;
1395         tp->mss_cache = 536;
1396
1397         tp->reordering = sysctl_tcp_reordering;
1398
1399         sk->sk_state = TCP_CLOSE;
1400
1401         icsk->icsk_af_ops = &ipv6_specific;
1402         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1403         icsk->icsk_sync_mss = tcp_sync_mss;
1404         sk->sk_write_space = sk_stream_write_space;
1405         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1406
1407         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1408         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1409
1410         atomic_inc(&tcp_sockets_allocated);
1411
1412         return 0;
1413 }
1414
1415 static int tcp_v6_destroy_sock(struct sock *sk)
1416 {
1417         tcp_v4_destroy_sock(sk);
1418         return inet6_destroy_sock(sk);
1419 }
1420
1421 /* Proc filesystem TCPv6 sock list dumping. */
1422 static void get_openreq6(struct seq_file *seq, 
1423                          struct sock *sk, struct request_sock *req, int i, int uid)
1424 {
1425         int ttd = req->expires - jiffies;
1426         struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1427         struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1428
1429         if (ttd < 0)
1430                 ttd = 0;
1431
1432         seq_printf(seq,
1433                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1434                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1435                    i,
1436                    src->s6_addr32[0], src->s6_addr32[1],
1437                    src->s6_addr32[2], src->s6_addr32[3],
1438                    ntohs(inet_sk(sk)->sport),
1439                    dest->s6_addr32[0], dest->s6_addr32[1],
1440                    dest->s6_addr32[2], dest->s6_addr32[3],
1441                    ntohs(inet_rsk(req)->rmt_port),
1442                    TCP_SYN_RECV,
1443                    0,0, /* could print option size, but that is af dependent. */
1444                    1,   /* timers active (only the expire timer) */  
1445                    jiffies_to_clock_t(ttd), 
1446                    req->retrans,
1447                    uid,
1448                    0,  /* non standard timer */  
1449                    0, /* open_requests have no inode */
1450                    0, req);
1451 }
1452
1453 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1454 {
1455         struct in6_addr *dest, *src;
1456         __u16 destp, srcp;
1457         int timer_active;
1458         unsigned long timer_expires;
1459         struct inet_sock *inet = inet_sk(sp);
1460         struct tcp_sock *tp = tcp_sk(sp);
1461         const struct inet_connection_sock *icsk = inet_csk(sp);
1462         struct ipv6_pinfo *np = inet6_sk(sp);
1463
1464         dest  = &np->daddr;
1465         src   = &np->rcv_saddr;
1466         destp = ntohs(inet->dport);
1467         srcp  = ntohs(inet->sport);
1468
1469         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1470                 timer_active    = 1;
1471                 timer_expires   = icsk->icsk_timeout;
1472         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1473                 timer_active    = 4;
1474                 timer_expires   = icsk->icsk_timeout;
1475         } else if (timer_pending(&sp->sk_timer)) {
1476                 timer_active    = 2;
1477                 timer_expires   = sp->sk_timer.expires;
1478         } else {
1479                 timer_active    = 0;
1480                 timer_expires = jiffies;
1481         }
1482
1483         seq_printf(seq,
1484                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1485                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1486                    i,
1487                    src->s6_addr32[0], src->s6_addr32[1],
1488                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1489                    dest->s6_addr32[0], dest->s6_addr32[1],
1490                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1491                    sp->sk_state, 
1492                    tp->write_seq-tp->snd_una,
1493                    (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1494                    timer_active,
1495                    jiffies_to_clock_t(timer_expires - jiffies),
1496                    icsk->icsk_retransmits,
1497                    sock_i_uid(sp),
1498                    icsk->icsk_probes_out,
1499                    sock_i_ino(sp),
1500                    atomic_read(&sp->sk_refcnt), sp,
1501                    icsk->icsk_rto,
1502                    icsk->icsk_ack.ato,
1503                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1504                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1505                    );
1506 }
1507
1508 static void get_timewait6_sock(struct seq_file *seq, 
1509                                struct inet_timewait_sock *tw, int i)
1510 {
1511         struct in6_addr *dest, *src;
1512         __u16 destp, srcp;
1513         struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1514         int ttd = tw->tw_ttd - jiffies;
1515
1516         if (ttd < 0)
1517                 ttd = 0;
1518
1519         dest = &tw6->tw_v6_daddr;
1520         src  = &tw6->tw_v6_rcv_saddr;
1521         destp = ntohs(tw->tw_dport);
1522         srcp  = ntohs(tw->tw_sport);
1523
1524         seq_printf(seq,
1525                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1526                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1527                    i,
1528                    src->s6_addr32[0], src->s6_addr32[1],
1529                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1530                    dest->s6_addr32[0], dest->s6_addr32[1],
1531                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1532                    tw->tw_substate, 0, 0,
1533                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1534                    atomic_read(&tw->tw_refcnt), tw);
1535 }
1536
1537 #ifdef CONFIG_PROC_FS
1538 static int tcp6_seq_show(struct seq_file *seq, void *v)
1539 {
1540         struct tcp_iter_state *st;
1541
1542         if (v == SEQ_START_TOKEN) {
1543                 seq_puts(seq,
1544                          "  sl  "
1545                          "local_address                         "
1546                          "remote_address                        "
1547                          "st tx_queue rx_queue tr tm->when retrnsmt"
1548                          "   uid  timeout inode\n");
1549                 goto out;
1550         }
1551         st = seq->private;
1552
1553         switch (st->state) {
1554         case TCP_SEQ_STATE_LISTENING:
1555         case TCP_SEQ_STATE_ESTABLISHED:
1556                 get_tcp6_sock(seq, v, st->num);
1557                 break;
1558         case TCP_SEQ_STATE_OPENREQ:
1559                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1560                 break;
1561         case TCP_SEQ_STATE_TIME_WAIT:
1562                 get_timewait6_sock(seq, v, st->num);
1563                 break;
1564         }
1565 out:
1566         return 0;
1567 }
1568
1569 static struct file_operations tcp6_seq_fops;
1570 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1571         .owner          = THIS_MODULE,
1572         .name           = "tcp6",
1573         .family         = AF_INET6,
1574         .seq_show       = tcp6_seq_show,
1575         .seq_fops       = &tcp6_seq_fops,
1576 };
1577
1578 int __init tcp6_proc_init(void)
1579 {
1580         return tcp_proc_register(&tcp6_seq_afinfo);
1581 }
1582
1583 void tcp6_proc_exit(void)
1584 {
1585         tcp_proc_unregister(&tcp6_seq_afinfo);
1586 }
1587 #endif
1588
1589 struct proto tcpv6_prot = {
1590         .name                   = "TCPv6",
1591         .owner                  = THIS_MODULE,
1592         .close                  = tcp_close,
1593         .connect                = tcp_v6_connect,
1594         .disconnect             = tcp_disconnect,
1595         .accept                 = inet_csk_accept,
1596         .ioctl                  = tcp_ioctl,
1597         .init                   = tcp_v6_init_sock,
1598         .destroy                = tcp_v6_destroy_sock,
1599         .shutdown               = tcp_shutdown,
1600         .setsockopt             = tcp_setsockopt,
1601         .getsockopt             = tcp_getsockopt,
1602         .sendmsg                = tcp_sendmsg,
1603         .recvmsg                = tcp_recvmsg,
1604         .backlog_rcv            = tcp_v6_do_rcv,
1605         .hash                   = tcp_v6_hash,
1606         .unhash                 = tcp_unhash,
1607         .get_port               = tcp_v6_get_port,
1608         .enter_memory_pressure  = tcp_enter_memory_pressure,
1609         .sockets_allocated      = &tcp_sockets_allocated,
1610         .memory_allocated       = &tcp_memory_allocated,
1611         .memory_pressure        = &tcp_memory_pressure,
1612         .orphan_count           = &tcp_orphan_count,
1613         .sysctl_mem             = sysctl_tcp_mem,
1614         .sysctl_wmem            = sysctl_tcp_wmem,
1615         .sysctl_rmem            = sysctl_tcp_rmem,
1616         .max_header             = MAX_TCP_HEADER,
1617         .obj_size               = sizeof(struct tcp6_sock),
1618         .twsk_prot              = &tcp6_timewait_sock_ops,
1619         .rsk_prot               = &tcp6_request_sock_ops,
1620 #ifdef CONFIG_COMPAT
1621         .compat_setsockopt      = compat_tcp_setsockopt,
1622         .compat_getsockopt      = compat_tcp_getsockopt,
1623 #endif
1624 };
1625
1626 static struct inet6_protocol tcpv6_protocol = {
1627         .handler        =       tcp_v6_rcv,
1628         .err_handler    =       tcp_v6_err,
1629         .gso_send_check =       tcp_v6_gso_send_check,
1630         .gso_segment    =       tcp_tso_segment,
1631         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1632 };
1633
1634 static struct inet_protosw tcpv6_protosw = {
1635         .type           =       SOCK_STREAM,
1636         .protocol       =       IPPROTO_TCP,
1637         .prot           =       &tcpv6_prot,
1638         .ops            =       &inet6_stream_ops,
1639         .capability     =       -1,
1640         .no_check       =       0,
1641         .flags          =       INET_PROTOSW_PERMANENT |
1642                                 INET_PROTOSW_ICSK,
1643 };
1644
1645 void __init tcpv6_init(void)
1646 {
1647         /* register inet6 protocol */
1648         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1649                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1650         inet6_register_protosw(&tcpv6_protosw);
1651
1652         if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW,
1653                                      IPPROTO_TCP) < 0)
1654                 panic("Failed to create the TCPv6 control socket.\n");
1655 }