]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/tcp_ipv4.c
net: replace NIPQUAD() in net/ipv4/ net/ipv6/
[net-next-2.6.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4
LT
53
54#include <linux/types.h>
55#include <linux/fcntl.h>
56#include <linux/module.h>
57#include <linux/random.h>
58#include <linux/cache.h>
59#include <linux/jhash.h>
60#include <linux/init.h>
61#include <linux/times.h>
62
457c4cbc 63#include <net/net_namespace.h>
1da177e4 64#include <net/icmp.h>
304a1618 65#include <net/inet_hashtables.h>
1da177e4 66#include <net/tcp.h>
20380731 67#include <net/transp_v6.h>
1da177e4
LT
68#include <net/ipv6.h>
69#include <net/inet_common.h>
6d6ee43e 70#include <net/timewait_sock.h>
1da177e4 71#include <net/xfrm.h>
1a2449a8 72#include <net/netdma.h>
1da177e4
LT
73
74#include <linux/inet.h>
75#include <linux/ipv6.h>
76#include <linux/stddef.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79
cfb6eeb4
YH
80#include <linux/crypto.h>
81#include <linux/scatterlist.h>
82
ab32ea5d
BH
83int sysctl_tcp_tw_reuse __read_mostly;
84int sysctl_tcp_low_latency __read_mostly;
1da177e4 85
1da177e4 86
cfb6eeb4 87#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
88static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
89 __be32 addr);
49a72dfb
AL
90static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
91 __be32 daddr, __be32 saddr, struct tcphdr *th);
9501f972
YH
92#else
93static inline
94struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
95{
96 return NULL;
97}
cfb6eeb4
YH
98#endif
99
0f7ff927 100struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
7174259e
ACM
101 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
102 .lhash_users = ATOMIC_INIT(0),
103 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
1da177e4
LT
104};
105
a94f723d 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1da177e4 107{
eddc9ec5
ACM
108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 ip_hdr(skb)->saddr,
aa8223c7
ACM
110 tcp_hdr(skb)->dest,
111 tcp_hdr(skb)->source);
1da177e4
LT
112}
113
6d6ee43e
ACM
114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115{
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
118
119 /* With PAWS, it is safe from the viewpoint
120 of data integrity. Even without PAWS it is safe provided sequence
121 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122
123 Actually, the idea is close to VJ's one, only timestamp cache is
124 held not per host, but per port pair and TW bucket is used as state
125 holder.
126
127 If TW bucket has been already destroyed we fall back to VJ's scheme
128 and use initial timestamp retrieved from peer table.
129 */
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
135 tp->write_seq = 1;
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 sock_hold(sktw);
139 return 1;
140 }
141
142 return 0;
143}
144
145EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146
1da177e4
LT
147/* This will initiate an outgoing connection. */
148int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149{
150 struct inet_sock *inet = inet_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 struct rtable *rt;
bada8adc 154 __be32 daddr, nexthop;
1da177e4
LT
155 int tmp;
156 int err;
157
158 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL;
160
161 if (usin->sin_family != AF_INET)
162 return -EAFNOSUPPORT;
163
164 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) {
166 if (!daddr)
167 return -EINVAL;
168 nexthop = inet->opt->faddr;
169 }
170
171 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
172 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 IPPROTO_TCP,
8eb9086f 174 inet->sport, usin->sin_port, sk, 1);
584bdf8c
WD
175 if (tmp < 0) {
176 if (tmp == -ENETUNREACH)
7c73a6fa 177 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
1da177e4 178 return tmp;
584bdf8c 179 }
1da177e4
LT
180
181 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182 ip_rt_put(rt);
183 return -ENETUNREACH;
184 }
185
186 if (!inet->opt || !inet->opt->srr)
187 daddr = rt->rt_dst;
188
189 if (!inet->saddr)
190 inet->saddr = rt->rt_src;
191 inet->rcv_saddr = inet->saddr;
192
193 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
194 /* Reset inherited state */
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
197 tp->write_seq = 0;
198 }
199
295ff7ed 200 if (tcp_death_row.sysctl_tw_recycle &&
1da177e4
LT
201 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
202 struct inet_peer *peer = rt_get_peer(rt);
7174259e
ACM
203 /*
204 * VJ's idea. We save last timestamp seen from
205 * the destination in peer table, when entering state
206 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
207 * when trying new connection.
1da177e4 208 */
7174259e 209 if (peer != NULL &&
9d729f72 210 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
1da177e4
LT
211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
214 }
215
216 inet->dport = usin->sin_port;
217 inet->daddr = daddr;
218
d83d8461 219 inet_csk(sk)->icsk_ext_hdr_len = 0;
1da177e4 220 if (inet->opt)
d83d8461 221 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
1da177e4
LT
222
223 tp->rx_opt.mss_clamp = 536;
224
225 /* Socket identity is still unknown (sport may be zero).
226 * However we set state to SYN-SENT and not releasing socket
227 * lock select source port, enter ourselves into the hash tables and
228 * complete initialization after this.
229 */
230 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 231 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
232 if (err)
233 goto failure;
234
7174259e
ACM
235 err = ip_route_newports(&rt, IPPROTO_TCP,
236 inet->sport, inet->dport, sk);
1da177e4
LT
237 if (err)
238 goto failure;
239
240 /* OK, now commit destination to socket. */
bcd76111 241 sk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 242 sk_setup_caps(sk, &rt->u.dst);
1da177e4
LT
243
244 if (!tp->write_seq)
245 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
246 inet->daddr,
247 inet->sport,
248 usin->sin_port);
249
250 inet->id = tp->write_seq ^ jiffies;
251
252 err = tcp_connect(sk);
253 rt = NULL;
254 if (err)
255 goto failure;
256
257 return 0;
258
259failure:
7174259e
ACM
260 /*
261 * This unhashes the socket and releases the local port,
262 * if necessary.
263 */
1da177e4
LT
264 tcp_set_state(sk, TCP_CLOSE);
265 ip_rt_put(rt);
266 sk->sk_route_caps = 0;
267 inet->dport = 0;
268 return err;
269}
270
1da177e4
LT
271/*
272 * This routine does path mtu discovery as defined in RFC1191.
273 */
40efc6fa 274static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
1da177e4
LT
275{
276 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
278
279 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 * send out by Linux are always <576bytes so they should go through
281 * unfragmented).
282 */
283 if (sk->sk_state == TCP_LISTEN)
284 return;
285
286 /* We don't check in the destentry if pmtu discovery is forbidden
287 * on this route. We just assume that no packet_to_big packets
288 * are send back when pmtu discovery is not active.
e905a9ed 289 * There is a small race when the user changes this flag in the
1da177e4
LT
290 * route, but I think that's acceptable.
291 */
292 if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 return;
294
295 dst->ops->update_pmtu(dst, mtu);
296
297 /* Something is about to be wrong... Remember soft error
298 * for the case, if this connection will not able to recover.
299 */
300 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 sk->sk_err_soft = EMSGSIZE;
302
303 mtu = dst_mtu(dst);
304
305 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 306 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
307 tcp_sync_mss(sk, mtu);
308
309 /* Resend the TCP packet because it's
310 * clear that the old packet has been
311 * dropped. This is the new "fast" path mtu
312 * discovery.
313 */
314 tcp_simple_retransmit(sk);
315 } /* else let the usual retransmit timer handle it */
316}
317
318/*
319 * This routine is called by the ICMP module when it gets some
320 * sort of error condition. If err < 0 then the socket should
321 * be closed and the error returned to the user. If err > 0
322 * it's just the icmp type << 8 | icmp code. After adjustment
323 * header points to the first 8 bytes of the tcp header. We need
324 * to find the appropriate port.
325 *
326 * The locking strategy used here is very "optimistic". When
327 * someone else accesses the socket the ICMP is just dropped
328 * and for some paths there is no check at all.
329 * A more general error queue to queue errors for later handling
330 * is probably better.
331 *
332 */
333
334void tcp_v4_err(struct sk_buff *skb, u32 info)
335{
336 struct iphdr *iph = (struct iphdr *)skb->data;
337 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
338 struct tcp_sock *tp;
339 struct inet_sock *inet;
88c7664f
ACM
340 const int type = icmp_hdr(skb)->type;
341 const int code = icmp_hdr(skb)->code;
1da177e4
LT
342 struct sock *sk;
343 __u32 seq;
344 int err;
fd54d716 345 struct net *net = dev_net(skb->dev);
1da177e4
LT
346
347 if (skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
349 return;
350 }
351
fd54d716 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
c67499c0 353 iph->saddr, th->source, inet_iif(skb));
1da177e4 354 if (!sk) {
dcfc23ca 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
356 return;
357 }
358 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 359 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
360 return;
361 }
362
363 bh_lock_sock(sk);
364 /* If too many ICMPs get dropped on busy
365 * servers this needs to be solved differently.
366 */
367 if (sock_owned_by_user(sk))
de0744af 368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
369
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
373 tp = tcp_sk(sk);
374 seq = ntohl(th->seq);
375 if (sk->sk_state != TCP_LISTEN &&
376 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 377 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
378 goto out;
379 }
380
381 switch (type) {
382 case ICMP_SOURCE_QUENCH:
383 /* Just silently ignore these. */
384 goto out;
385 case ICMP_PARAMETERPROB:
386 err = EPROTO;
387 break;
388 case ICMP_DEST_UNREACH:
389 if (code > NR_ICMP_UNREACH)
390 goto out;
391
392 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
393 if (!sock_owned_by_user(sk))
394 do_pmtu_discovery(sk, iph, info);
395 goto out;
396 }
397
398 err = icmp_err_convert[code].errno;
399 break;
400 case ICMP_TIME_EXCEEDED:
401 err = EHOSTUNREACH;
402 break;
403 default:
404 goto out;
405 }
406
407 switch (sk->sk_state) {
60236fdd 408 struct request_sock *req, **prev;
1da177e4
LT
409 case TCP_LISTEN:
410 if (sock_owned_by_user(sk))
411 goto out;
412
463c84b9
ACM
413 req = inet_csk_search_req(sk, &prev, th->dest,
414 iph->daddr, iph->saddr);
1da177e4
LT
415 if (!req)
416 goto out;
417
418 /* ICMPs are not backlogged, hence we cannot get
419 an established socket here.
420 */
547b792c 421 WARN_ON(req->sk);
1da177e4 422
2e6599cb 423 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 424 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
425 goto out;
426 }
427
428 /*
429 * Still in SYN_RECV, just remove it silently.
430 * There is no good way to pass the error to the newly
431 * created socket, and POSIX does not want network
432 * errors returned from accept().
433 */
463c84b9 434 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
435 goto out;
436
437 case TCP_SYN_SENT:
438 case TCP_SYN_RECV: /* Cannot happen.
439 It can f.e. if SYNs crossed.
440 */
441 if (!sock_owned_by_user(sk)) {
1da177e4
LT
442 sk->sk_err = err;
443
444 sk->sk_error_report(sk);
445
446 tcp_done(sk);
447 } else {
448 sk->sk_err_soft = err;
449 }
450 goto out;
451 }
452
453 /* If we've already connected we will keep trying
454 * until we time out, or the user gives up.
455 *
456 * rfc1122 4.2.3.9 allows to consider as hard errors
457 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
458 * but it is obsoleted by pmtu discovery).
459 *
460 * Note, that in modern internet, where routing is unreliable
461 * and in each dark corner broken firewalls sit, sending random
462 * errors ordered by their masters even this two messages finally lose
463 * their original sense (even Linux sends invalid PORT_UNREACHs)
464 *
465 * Now we are in compliance with RFCs.
466 * --ANK (980905)
467 */
468
469 inet = inet_sk(sk);
470 if (!sock_owned_by_user(sk) && inet->recverr) {
471 sk->sk_err = err;
472 sk->sk_error_report(sk);
473 } else { /* Only an error on timeout */
474 sk->sk_err_soft = err;
475 }
476
477out:
478 bh_unlock_sock(sk);
479 sock_put(sk);
480}
481
482/* This routine computes an IPv4 TCP checksum. */
8292a17a 483void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
1da177e4
LT
484{
485 struct inet_sock *inet = inet_sk(sk);
aa8223c7 486 struct tcphdr *th = tcp_hdr(skb);
1da177e4 487
84fa7933 488 if (skb->ip_summed == CHECKSUM_PARTIAL) {
ba7808ea
FD
489 th->check = ~tcp_v4_check(len, inet->saddr,
490 inet->daddr, 0);
663ead3b 491 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 492 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 493 } else {
ba7808ea 494 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
1da177e4
LT
495 csum_partial((char *)th,
496 th->doff << 2,
497 skb->csum));
498 }
499}
500
a430a43d
HX
501int tcp_v4_gso_send_check(struct sk_buff *skb)
502{
eddc9ec5 503 const struct iphdr *iph;
a430a43d
HX
504 struct tcphdr *th;
505
506 if (!pskb_may_pull(skb, sizeof(*th)))
507 return -EINVAL;
508
eddc9ec5 509 iph = ip_hdr(skb);
aa8223c7 510 th = tcp_hdr(skb);
a430a43d
HX
511
512 th->check = 0;
ba7808ea 513 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
663ead3b 514 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 515 skb->csum_offset = offsetof(struct tcphdr, check);
84fa7933 516 skb->ip_summed = CHECKSUM_PARTIAL;
a430a43d
HX
517 return 0;
518}
519
1da177e4
LT
520/*
521 * This routine will send an RST to the other tcp.
522 *
523 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
524 * for reset.
525 * Answer: if a packet caused RST, it is not for a socket
526 * existing in our system, if it is matched to a socket,
527 * it is just duplicate segment or bug in other side's TCP.
528 * So that we build reply only basing on parameters
529 * arrived with segment.
530 * Exception: precedence violation. We do not implement it in any case.
531 */
532
cfb6eeb4 533static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 534{
aa8223c7 535 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
536 struct {
537 struct tcphdr th;
538#ifdef CONFIG_TCP_MD5SIG
714e85be 539 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
540#endif
541 } rep;
1da177e4 542 struct ip_reply_arg arg;
cfb6eeb4
YH
543#ifdef CONFIG_TCP_MD5SIG
544 struct tcp_md5sig_key *key;
545#endif
a86b1e30 546 struct net *net;
1da177e4
LT
547
548 /* Never send a reset in response to a reset. */
549 if (th->rst)
550 return;
551
ee6b9673 552 if (skb->rtable->rt_type != RTN_LOCAL)
1da177e4
LT
553 return;
554
555 /* Swap the send and the receive. */
cfb6eeb4
YH
556 memset(&rep, 0, sizeof(rep));
557 rep.th.dest = th->source;
558 rep.th.source = th->dest;
559 rep.th.doff = sizeof(struct tcphdr) / 4;
560 rep.th.rst = 1;
1da177e4
LT
561
562 if (th->ack) {
cfb6eeb4 563 rep.th.seq = th->ack_seq;
1da177e4 564 } else {
cfb6eeb4
YH
565 rep.th.ack = 1;
566 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
567 skb->len - (th->doff << 2));
1da177e4
LT
568 }
569
7174259e 570 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
571 arg.iov[0].iov_base = (unsigned char *)&rep;
572 arg.iov[0].iov_len = sizeof(rep.th);
573
574#ifdef CONFIG_TCP_MD5SIG
eddc9ec5 575 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
cfb6eeb4
YH
576 if (key) {
577 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
578 (TCPOPT_NOP << 16) |
579 (TCPOPT_MD5SIG << 8) |
580 TCPOLEN_MD5SIG);
581 /* Update length and the length the header thinks exists */
582 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
583 rep.th.doff = arg.iov[0].iov_len / 4;
584
49a72dfb 585 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
586 key, ip_hdr(skb)->saddr,
587 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
588 }
589#endif
eddc9ec5
ACM
590 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
591 ip_hdr(skb)->saddr, /* XXX */
52cd5750 592 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 593 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 594 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
1da177e4 595
a86b1e30
PE
596 net = dev_net(skb->dst->dev);
597 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 598 &arg, arg.iov[0].iov_len);
1da177e4 599
63231bdd
PE
600 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
601 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
602}
603
604/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
605 outside socket context is ugly, certainly. What can I do?
606 */
607
9501f972
YH
608static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
609 u32 win, u32 ts, int oif,
88ef4a5a
KK
610 struct tcp_md5sig_key *key,
611 int reply_flags)
1da177e4 612{
aa8223c7 613 struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
614 struct {
615 struct tcphdr th;
714e85be 616 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 617#ifdef CONFIG_TCP_MD5SIG
714e85be 618 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
619#endif
620 ];
1da177e4
LT
621 } rep;
622 struct ip_reply_arg arg;
4dd7972d 623 struct net *net = dev_net(skb->dst->dev);
1da177e4
LT
624
625 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 626 memset(&arg, 0, sizeof(arg));
1da177e4
LT
627
628 arg.iov[0].iov_base = (unsigned char *)&rep;
629 arg.iov[0].iov_len = sizeof(rep.th);
630 if (ts) {
cfb6eeb4
YH
631 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
632 (TCPOPT_TIMESTAMP << 8) |
633 TCPOLEN_TIMESTAMP);
634 rep.opt[1] = htonl(tcp_time_stamp);
635 rep.opt[2] = htonl(ts);
cb48cfe8 636 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
637 }
638
639 /* Swap the send and the receive. */
640 rep.th.dest = th->source;
641 rep.th.source = th->dest;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643 rep.th.seq = htonl(seq);
644 rep.th.ack_seq = htonl(ack);
645 rep.th.ack = 1;
646 rep.th.window = htons(win);
647
cfb6eeb4 648#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
649 if (key) {
650 int offset = (ts) ? 3 : 0;
651
652 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
653 (TCPOPT_NOP << 16) |
654 (TCPOPT_MD5SIG << 8) |
655 TCPOLEN_MD5SIG);
656 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
657 rep.th.doff = arg.iov[0].iov_len/4;
658
49a72dfb 659 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
660 key, ip_hdr(skb)->saddr,
661 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
662 }
663#endif
88ef4a5a 664 arg.flags = reply_flags;
eddc9ec5
ACM
665 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
666 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
667 arg.iov[0].iov_len, IPPROTO_TCP, 0);
668 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
669 if (oif)
670 arg.bound_dev_if = oif;
1da177e4 671
a86b1e30 672 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 673 &arg, arg.iov[0].iov_len);
1da177e4 674
63231bdd 675 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
676}
677
678static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
679{
8feaf0c0 680 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 681 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 682
9501f972 683 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 684 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
685 tcptw->tw_ts_recent,
686 tw->tw_bound_dev_if,
88ef4a5a
KK
687 tcp_twsk_md5_key(tcptw),
688 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
9501f972 689 );
1da177e4 690
8feaf0c0 691 inet_twsk_put(tw);
1da177e4
LT
692}
693
6edafaaf 694static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 695 struct request_sock *req)
1da177e4 696{
9501f972 697 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 698 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
699 req->ts_recent,
700 0,
88ef4a5a
KK
701 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
702 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
1da177e4
LT
703}
704
1da177e4 705/*
9bf1d83e 706 * Send a SYN-ACK after having received a SYN.
60236fdd 707 * This still operates on a request_sock only, not on a big
1da177e4
LT
708 * socket.
709 */
fd80eb94
DL
710static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
711 struct dst_entry *dst)
1da177e4 712{
2e6599cb 713 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
714 int err = -1;
715 struct sk_buff * skb;
716
717 /* First, grab a route. */
463c84b9 718 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
fd80eb94 719 return -1;
1da177e4
LT
720
721 skb = tcp_make_synack(sk, dst, req);
722
723 if (skb) {
aa8223c7 724 struct tcphdr *th = tcp_hdr(skb);
1da177e4 725
ba7808ea 726 th->check = tcp_v4_check(skb->len,
2e6599cb
ACM
727 ireq->loc_addr,
728 ireq->rmt_addr,
1da177e4
LT
729 csum_partial((char *)th, skb->len,
730 skb->csum));
731
2e6599cb
ACM
732 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
733 ireq->rmt_addr,
734 ireq->opt);
b9df3cb8 735 err = net_xmit_eval(err);
1da177e4
LT
736 }
737
1da177e4
LT
738 dst_release(dst);
739 return err;
740}
741
fd80eb94
DL
742static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
743{
744 return __tcp_v4_send_synack(sk, req, NULL);
745}
746
1da177e4 747/*
60236fdd 748 * IPv4 request_sock destructor.
1da177e4 749 */
60236fdd 750static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 751{
a51482bd 752 kfree(inet_rsk(req)->opt);
1da177e4
LT
753}
754
80e40daa 755#ifdef CONFIG_SYN_COOKIES
40efc6fa 756static void syn_flood_warning(struct sk_buff *skb)
1da177e4
LT
757{
758 static unsigned long warntime;
759
760 if (time_after(jiffies, (warntime + HZ * 60))) {
761 warntime = jiffies;
762 printk(KERN_INFO
763 "possible SYN flooding on port %d. Sending cookies.\n",
aa8223c7 764 ntohs(tcp_hdr(skb)->dest));
1da177e4
LT
765 }
766}
80e40daa 767#endif
1da177e4
LT
768
769/*
60236fdd 770 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 771 */
40efc6fa
SH
772static struct ip_options *tcp_v4_save_options(struct sock *sk,
773 struct sk_buff *skb)
1da177e4
LT
774{
775 struct ip_options *opt = &(IPCB(skb)->opt);
776 struct ip_options *dopt = NULL;
777
778 if (opt && opt->optlen) {
779 int opt_size = optlength(opt);
780 dopt = kmalloc(opt_size, GFP_ATOMIC);
781 if (dopt) {
782 if (ip_options_echo(dopt, skb)) {
783 kfree(dopt);
784 dopt = NULL;
785 }
786 }
787 }
788 return dopt;
789}
790
cfb6eeb4
YH
791#ifdef CONFIG_TCP_MD5SIG
792/*
793 * RFC2385 MD5 checksumming requires a mapping of
794 * IP address->MD5 Key.
795 * We need to maintain these in the sk structure.
796 */
797
798/* Find the Key structure for an address. */
7174259e
ACM
799static struct tcp_md5sig_key *
800 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
801{
802 struct tcp_sock *tp = tcp_sk(sk);
803 int i;
804
805 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
806 return NULL;
807 for (i = 0; i < tp->md5sig_info->entries4; i++) {
808 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 809 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
810 }
811 return NULL;
812}
813
814struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
815 struct sock *addr_sk)
816{
817 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
818}
819
820EXPORT_SYMBOL(tcp_v4_md5_lookup);
821
f5b99bcd
AB
822static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
823 struct request_sock *req)
cfb6eeb4
YH
824{
825 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
826}
827
828/* This can be called on a newly created socket, from other files */
829int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
830 u8 *newkey, u8 newkeylen)
831{
832 /* Add Key to the list */
b0a713e9 833 struct tcp_md5sig_key *key;
cfb6eeb4
YH
834 struct tcp_sock *tp = tcp_sk(sk);
835 struct tcp4_md5sig_key *keys;
836
b0a713e9 837 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
838 if (key) {
839 /* Pre-existing entry - just update that one. */
b0a713e9
MD
840 kfree(key->key);
841 key->key = newkey;
842 key->keylen = newkeylen;
cfb6eeb4 843 } else {
f6685938
ACM
844 struct tcp_md5sig_info *md5sig;
845
cfb6eeb4 846 if (!tp->md5sig_info) {
f6685938
ACM
847 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
848 GFP_ATOMIC);
cfb6eeb4
YH
849 if (!tp->md5sig_info) {
850 kfree(newkey);
851 return -ENOMEM;
852 }
3d7dbeac 853 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
854 }
855 if (tcp_alloc_md5sig_pool() == NULL) {
856 kfree(newkey);
857 return -ENOMEM;
858 }
f6685938
ACM
859 md5sig = tp->md5sig_info;
860
861 if (md5sig->alloced4 == md5sig->entries4) {
862 keys = kmalloc((sizeof(*keys) *
e905a9ed 863 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
864 if (!keys) {
865 kfree(newkey);
866 tcp_free_md5sig_pool();
867 return -ENOMEM;
868 }
869
f6685938
ACM
870 if (md5sig->entries4)
871 memcpy(keys, md5sig->keys4,
872 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
873
874 /* Free old key list, and reference new one */
a80cc20d 875 kfree(md5sig->keys4);
f6685938
ACM
876 md5sig->keys4 = keys;
877 md5sig->alloced4++;
cfb6eeb4 878 }
f6685938 879 md5sig->entries4++;
f8ab18d2
DM
880 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
881 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
882 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
883 }
884 return 0;
885}
886
887EXPORT_SYMBOL(tcp_v4_md5_do_add);
888
889static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
890 u8 *newkey, u8 newkeylen)
891{
892 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
893 newkey, newkeylen);
894}
895
896int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
897{
898 struct tcp_sock *tp = tcp_sk(sk);
899 int i;
900
901 for (i = 0; i < tp->md5sig_info->entries4; i++) {
902 if (tp->md5sig_info->keys4[i].addr == addr) {
903 /* Free the key */
f8ab18d2 904 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
905 tp->md5sig_info->entries4--;
906
907 if (tp->md5sig_info->entries4 == 0) {
908 kfree(tp->md5sig_info->keys4);
909 tp->md5sig_info->keys4 = NULL;
8228a18d 910 tp->md5sig_info->alloced4 = 0;
7174259e 911 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 912 /* Need to do some manipulation */
354faf09
YH
913 memmove(&tp->md5sig_info->keys4[i],
914 &tp->md5sig_info->keys4[i+1],
915 (tp->md5sig_info->entries4 - i) *
916 sizeof(struct tcp4_md5sig_key));
cfb6eeb4
YH
917 }
918 tcp_free_md5sig_pool();
919 return 0;
920 }
921 }
922 return -ENOENT;
923}
924
925EXPORT_SYMBOL(tcp_v4_md5_do_del);
926
7174259e 927static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
928{
929 struct tcp_sock *tp = tcp_sk(sk);
930
931 /* Free each key, then the set of key keys,
932 * the crypto element, and then decrement our
933 * hold on the last resort crypto.
934 */
935 if (tp->md5sig_info->entries4) {
936 int i;
937 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 938 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
939 tp->md5sig_info->entries4 = 0;
940 tcp_free_md5sig_pool();
941 }
942 if (tp->md5sig_info->keys4) {
943 kfree(tp->md5sig_info->keys4);
944 tp->md5sig_info->keys4 = NULL;
945 tp->md5sig_info->alloced4 = 0;
946 }
947}
948
7174259e
ACM
949static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
950 int optlen)
cfb6eeb4
YH
951{
952 struct tcp_md5sig cmd;
953 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
954 u8 *newkey;
955
956 if (optlen < sizeof(cmd))
957 return -EINVAL;
958
7174259e 959 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
960 return -EFAULT;
961
962 if (sin->sin_family != AF_INET)
963 return -EINVAL;
964
965 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
966 if (!tcp_sk(sk)->md5sig_info)
967 return -ENOENT;
968 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
969 }
970
971 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
972 return -EINVAL;
973
974 if (!tcp_sk(sk)->md5sig_info) {
975 struct tcp_sock *tp = tcp_sk(sk);
7174259e 976 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
cfb6eeb4 977
cfb6eeb4
YH
978 if (!p)
979 return -EINVAL;
980
981 tp->md5sig_info = p;
3d7dbeac 982 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
983 }
984
f6685938 985 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
cfb6eeb4
YH
986 if (!newkey)
987 return -ENOMEM;
cfb6eeb4
YH
988 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
989 newkey, cmd.tcpm_keylen);
990}
991
49a72dfb
AL
992static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
993 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 994{
cfb6eeb4 995 struct tcp4_pseudohdr *bp;
49a72dfb 996 struct scatterlist sg;
cfb6eeb4
YH
997
998 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
999
1000 /*
49a72dfb 1001 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1002 * destination IP address, zero-padded protocol number, and
1003 * segment length)
1004 */
1005 bp->saddr = saddr;
1006 bp->daddr = daddr;
1007 bp->pad = 0;
076fb722 1008 bp->protocol = IPPROTO_TCP;
49a72dfb 1009 bp->len = cpu_to_be16(nbytes);
c7da57a1 1010
49a72dfb
AL
1011 sg_init_one(&sg, bp, sizeof(*bp));
1012 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1013}
1014
1015static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1016 __be32 daddr, __be32 saddr, struct tcphdr *th)
1017{
1018 struct tcp_md5sig_pool *hp;
1019 struct hash_desc *desc;
1020
1021 hp = tcp_get_md5sig_pool();
1022 if (!hp)
1023 goto clear_hash_noput;
1024 desc = &hp->md5_desc;
1025
1026 if (crypto_hash_init(desc))
1027 goto clear_hash;
1028 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1029 goto clear_hash;
1030 if (tcp_md5_hash_header(hp, th))
1031 goto clear_hash;
1032 if (tcp_md5_hash_key(hp, key))
1033 goto clear_hash;
1034 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1035 goto clear_hash;
1036
cfb6eeb4 1037 tcp_put_md5sig_pool();
cfb6eeb4 1038 return 0;
49a72dfb 1039
cfb6eeb4
YH
1040clear_hash:
1041 tcp_put_md5sig_pool();
1042clear_hash_noput:
1043 memset(md5_hash, 0, 16);
49a72dfb 1044 return 1;
cfb6eeb4
YH
1045}
1046
49a72dfb
AL
1047int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1048 struct sock *sk, struct request_sock *req,
1049 struct sk_buff *skb)
cfb6eeb4 1050{
49a72dfb
AL
1051 struct tcp_md5sig_pool *hp;
1052 struct hash_desc *desc;
1053 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1054 __be32 saddr, daddr;
1055
1056 if (sk) {
1057 saddr = inet_sk(sk)->saddr;
1058 daddr = inet_sk(sk)->daddr;
49a72dfb
AL
1059 } else if (req) {
1060 saddr = inet_rsk(req)->loc_addr;
1061 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1062 } else {
49a72dfb
AL
1063 const struct iphdr *iph = ip_hdr(skb);
1064 saddr = iph->saddr;
1065 daddr = iph->daddr;
cfb6eeb4 1066 }
49a72dfb
AL
1067
1068 hp = tcp_get_md5sig_pool();
1069 if (!hp)
1070 goto clear_hash_noput;
1071 desc = &hp->md5_desc;
1072
1073 if (crypto_hash_init(desc))
1074 goto clear_hash;
1075
1076 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1077 goto clear_hash;
1078 if (tcp_md5_hash_header(hp, th))
1079 goto clear_hash;
1080 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1081 goto clear_hash;
1082 if (tcp_md5_hash_key(hp, key))
1083 goto clear_hash;
1084 if (crypto_hash_final(desc, md5_hash))
1085 goto clear_hash;
1086
1087 tcp_put_md5sig_pool();
1088 return 0;
1089
1090clear_hash:
1091 tcp_put_md5sig_pool();
1092clear_hash_noput:
1093 memset(md5_hash, 0, 16);
1094 return 1;
cfb6eeb4
YH
1095}
1096
49a72dfb 1097EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1098
7174259e 1099static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
cfb6eeb4
YH
1100{
1101 /*
1102 * This gets called for each TCP segment that arrives
1103 * so we want to be efficient.
1104 * We have 3 drop cases:
1105 * o No MD5 hash and one expected.
1106 * o MD5 hash and we're not expecting one.
1107 * o MD5 hash and its wrong.
1108 */
1109 __u8 *hash_location = NULL;
1110 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1111 const struct iphdr *iph = ip_hdr(skb);
aa8223c7 1112 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1113 int genhash;
cfb6eeb4
YH
1114 unsigned char newhash[16];
1115
1116 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1117 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1118
cfb6eeb4
YH
1119 /* We've parsed the options - do we have a hash? */
1120 if (!hash_expected && !hash_location)
1121 return 0;
1122
1123 if (hash_expected && !hash_location) {
785957d3 1124 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1125 return 1;
1126 }
1127
1128 if (!hash_expected && hash_location) {
785957d3 1129 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1130 return 1;
1131 }
1132
1133 /* Okay, so this is hash_expected and hash_location -
1134 * so we need to calculate the checksum.
1135 */
49a72dfb
AL
1136 genhash = tcp_v4_md5_hash_skb(newhash,
1137 hash_expected,
1138 NULL, NULL, skb);
cfb6eeb4
YH
1139
1140 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1141 if (net_ratelimit()) {
673d57e7
HH
1142 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1143 &iph->saddr, ntohs(th->source),
1144 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1145 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1146 }
1147 return 1;
1148 }
1149 return 0;
1150}
1151
1152#endif
1153
72a3effa 1154struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1155 .family = PF_INET,
2e6599cb 1156 .obj_size = sizeof(struct tcp_request_sock),
1da177e4 1157 .rtx_syn_ack = tcp_v4_send_synack,
60236fdd
ACM
1158 .send_ack = tcp_v4_reqsk_send_ack,
1159 .destructor = tcp_v4_reqsk_destructor,
1da177e4
LT
1160 .send_reset = tcp_v4_send_reset,
1161};
1162
cfb6eeb4 1163#ifdef CONFIG_TCP_MD5SIG
b6332e6c 1164static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1165 .md5_lookup = tcp_v4_reqsk_md5_lookup,
cfb6eeb4 1166};
b6332e6c 1167#endif
cfb6eeb4 1168
6d6ee43e
ACM
1169static struct timewait_sock_ops tcp_timewait_sock_ops = {
1170 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1171 .twsk_unique = tcp_twsk_unique,
cfb6eeb4 1172 .twsk_destructor= tcp_twsk_destructor,
6d6ee43e
ACM
1173};
1174
1da177e4
LT
1175int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1176{
2e6599cb 1177 struct inet_request_sock *ireq;
1da177e4 1178 struct tcp_options_received tmp_opt;
60236fdd 1179 struct request_sock *req;
eddc9ec5
ACM
1180 __be32 saddr = ip_hdr(skb)->saddr;
1181 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4
LT
1182 __u32 isn = TCP_SKB_CB(skb)->when;
1183 struct dst_entry *dst = NULL;
1184#ifdef CONFIG_SYN_COOKIES
1185 int want_cookie = 0;
1186#else
1187#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1188#endif
1189
1190 /* Never answer to SYNs send to broadcast or multicast */
ee6b9673 1191 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1192 goto drop;
1193
1194 /* TW buckets are converted to open requests without
1195 * limitations, they conserve resources and peer is
1196 * evidently real one.
1197 */
463c84b9 1198 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1199#ifdef CONFIG_SYN_COOKIES
1200 if (sysctl_tcp_syncookies) {
1201 want_cookie = 1;
1202 } else
1203#endif
1204 goto drop;
1205 }
1206
1207 /* Accept backlog is full. If we have already queued enough
1208 * of warm entries in syn queue, drop request. It is better than
1209 * clogging syn queue with openreqs with exponentially increasing
1210 * timeout.
1211 */
463c84b9 1212 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1213 goto drop;
1214
ce4a7d0d 1215 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1216 if (!req)
1217 goto drop;
1218
cfb6eeb4
YH
1219#ifdef CONFIG_TCP_MD5SIG
1220 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1221#endif
1222
1da177e4
LT
1223 tcp_clear_options(&tmp_opt);
1224 tmp_opt.mss_clamp = 536;
1225 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1226
1227 tcp_parse_options(skb, &tmp_opt, 0);
1228
4dfc2817 1229 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1230 tcp_clear_options(&tmp_opt);
1da177e4
LT
1231
1232 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1233 /* Some OSes (unknown ones, but I see them on web server, which
1234 * contains information interesting only for windows'
1235 * users) do not send their stamp in SYN. It is easy case.
1236 * We simply do not advertise TS support.
1237 */
1238 tmp_opt.saw_tstamp = 0;
1239 tmp_opt.tstamp_ok = 0;
1240 }
1241 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1242
1243 tcp_openreq_init(req, &tmp_opt, skb);
1244
4237c75c
VY
1245 if (security_inet_conn_request(sk, skb, req))
1246 goto drop_and_free;
1247
2e6599cb
ACM
1248 ireq = inet_rsk(req);
1249 ireq->loc_addr = daddr;
1250 ireq->rmt_addr = saddr;
88ef4a5a 1251 ireq->no_srccheck = inet_sk(sk)->transparent;
2e6599cb 1252 ireq->opt = tcp_v4_save_options(sk, skb);
1da177e4 1253 if (!want_cookie)
aa8223c7 1254 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1255
1256 if (want_cookie) {
1257#ifdef CONFIG_SYN_COOKIES
1258 syn_flood_warning(skb);
4dfc2817 1259 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1260#endif
1261 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1262 } else if (!isn) {
1263 struct inet_peer *peer = NULL;
1264
1265 /* VJ's idea. We save last timestamp seen
1266 * from the destination in peer table, when entering
1267 * state TIME-WAIT, and check against it before
1268 * accepting new connection request.
1269 *
1270 * If "isn" is not zero, this request hit alive
1271 * timewait bucket, so that all the necessary checks
1272 * are made in the function processing timewait state.
1273 */
1274 if (tmp_opt.saw_tstamp &&
295ff7ed 1275 tcp_death_row.sysctl_tw_recycle &&
463c84b9 1276 (dst = inet_csk_route_req(sk, req)) != NULL &&
1da177e4
LT
1277 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1278 peer->v4daddr == saddr) {
9d729f72 1279 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1da177e4
LT
1280 (s32)(peer->tcp_ts - req->ts_recent) >
1281 TCP_PAWS_WINDOW) {
de0744af 1282 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1283 goto drop_and_release;
1da177e4
LT
1284 }
1285 }
1286 /* Kill the following clause, if you dislike this way. */
1287 else if (!sysctl_tcp_syncookies &&
463c84b9 1288 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1289 (sysctl_max_syn_backlog >> 2)) &&
1290 (!peer || !peer->tcp_ts_stamp) &&
1291 (!dst || !dst_metric(dst, RTAX_RTT))) {
1292 /* Without syncookies last quarter of
1293 * backlog is filled with destinations,
1294 * proven to be alive.
1295 * It means that we continue to communicate
1296 * to destinations, already remembered
1297 * to the moment of synflood.
1298 */
673d57e7
HH
1299 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1300 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1301 goto drop_and_release;
1da177e4
LT
1302 }
1303
a94f723d 1304 isn = tcp_v4_init_sequence(skb);
1da177e4 1305 }
2e6599cb 1306 tcp_rsk(req)->snt_isn = isn;
1da177e4 1307
7cd04fa7 1308 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1da177e4
LT
1309 goto drop_and_free;
1310
7cd04fa7 1311 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1312 return 0;
1313
7cd04fa7
DL
1314drop_and_release:
1315 dst_release(dst);
1da177e4 1316drop_and_free:
60236fdd 1317 reqsk_free(req);
1da177e4 1318drop:
1da177e4
LT
1319 return 0;
1320}
1321
1322
1323/*
1324 * The three way handshake has completed - we got a valid synack -
1325 * now create the new socket.
1326 */
1327struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1328 struct request_sock *req,
1da177e4
LT
1329 struct dst_entry *dst)
1330{
2e6599cb 1331 struct inet_request_sock *ireq;
1da177e4
LT
1332 struct inet_sock *newinet;
1333 struct tcp_sock *newtp;
1334 struct sock *newsk;
cfb6eeb4
YH
1335#ifdef CONFIG_TCP_MD5SIG
1336 struct tcp_md5sig_key *key;
1337#endif
1da177e4
LT
1338
1339 if (sk_acceptq_is_full(sk))
1340 goto exit_overflow;
1341
463c84b9 1342 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
1343 goto exit;
1344
1345 newsk = tcp_create_openreq_child(sk, req, skb);
1346 if (!newsk)
1347 goto exit;
1348
bcd76111 1349 newsk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 1350 sk_setup_caps(newsk, dst);
1da177e4
LT
1351
1352 newtp = tcp_sk(newsk);
1353 newinet = inet_sk(newsk);
2e6599cb
ACM
1354 ireq = inet_rsk(req);
1355 newinet->daddr = ireq->rmt_addr;
1356 newinet->rcv_saddr = ireq->loc_addr;
1357 newinet->saddr = ireq->loc_addr;
1358 newinet->opt = ireq->opt;
1359 ireq->opt = NULL;
463c84b9 1360 newinet->mc_index = inet_iif(skb);
eddc9ec5 1361 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1362 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1363 if (newinet->opt)
d83d8461 1364 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1da177e4
LT
1365 newinet->id = newtp->write_seq ^ jiffies;
1366
5d424d5a 1367 tcp_mtup_init(newsk);
1da177e4
LT
1368 tcp_sync_mss(newsk, dst_mtu(dst));
1369 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
f5fff5dc
TQ
1370 if (tcp_sk(sk)->rx_opt.user_mss &&
1371 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1372 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1373
1da177e4
LT
1374 tcp_initialize_rcv_mss(newsk);
1375
cfb6eeb4
YH
1376#ifdef CONFIG_TCP_MD5SIG
1377 /* Copy over the MD5 key from the original socket */
1378 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1379 /*
1380 * We're using one, so create a matching key
1381 * on the newsk structure. If we fail to get
1382 * memory, then we end up not copying the key
1383 * across. Shucks.
1384 */
f6685938
ACM
1385 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1386 if (newkey != NULL)
cfb6eeb4
YH
1387 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1388 newkey, key->keylen);
49a72dfb 1389 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
1390 }
1391#endif
1392
ab1e0a13
ACM
1393 __inet_hash_nolisten(newsk);
1394 __inet_inherit_port(sk, newsk);
1da177e4
LT
1395
1396 return newsk;
1397
1398exit_overflow:
de0744af 1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1400exit:
de0744af 1401 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1402 dst_release(dst);
1403 return NULL;
1404}
1405
1406static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1407{
aa8223c7 1408 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1409 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1410 struct sock *nsk;
60236fdd 1411 struct request_sock **prev;
1da177e4 1412 /* Find possible connection requests. */
463c84b9
ACM
1413 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1414 iph->saddr, iph->daddr);
1da177e4
LT
1415 if (req)
1416 return tcp_check_req(sk, skb, req, prev);
1417
3b1e0a65 1418 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1419 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1420
1421 if (nsk) {
1422 if (nsk->sk_state != TCP_TIME_WAIT) {
1423 bh_lock_sock(nsk);
1424 return nsk;
1425 }
9469c7b4 1426 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1427 return NULL;
1428 }
1429
1430#ifdef CONFIG_SYN_COOKIES
1431 if (!th->rst && !th->syn && th->ack)
1432 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1433#endif
1434 return sk;
1435}
1436
b51655b9 1437static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1438{
eddc9ec5
ACM
1439 const struct iphdr *iph = ip_hdr(skb);
1440
84fa7933 1441 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1442 if (!tcp_v4_check(skb->len, iph->saddr,
1443 iph->daddr, skb->csum)) {
fb286bb2 1444 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1445 return 0;
fb286bb2 1446 }
1da177e4 1447 }
fb286bb2 1448
eddc9ec5 1449 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1450 skb->len, IPPROTO_TCP, 0);
1451
1da177e4 1452 if (skb->len <= 76) {
fb286bb2 1453 return __skb_checksum_complete(skb);
1da177e4
LT
1454 }
1455 return 0;
1456}
1457
1458
1459/* The socket must have it's spinlock held when we get
1460 * here.
1461 *
1462 * We have a potential double-lock case here, so even when
1463 * doing backlog processing we use the BH locking scheme.
1464 * This is because we cannot sleep with the original spinlock
1465 * held.
1466 */
1467int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1468{
cfb6eeb4
YH
1469 struct sock *rsk;
1470#ifdef CONFIG_TCP_MD5SIG
1471 /*
1472 * We really want to reject the packet as early as possible
1473 * if:
1474 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1475 * o There is an MD5 option and we're not expecting one
1476 */
7174259e 1477 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1478 goto discard;
1479#endif
1480
1da177e4
LT
1481 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1482 TCP_CHECK_TIMER(sk);
aa8223c7 1483 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1484 rsk = sk;
1da177e4 1485 goto reset;
cfb6eeb4 1486 }
1da177e4
LT
1487 TCP_CHECK_TIMER(sk);
1488 return 0;
1489 }
1490
ab6a5bb6 1491 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1492 goto csum_err;
1493
1494 if (sk->sk_state == TCP_LISTEN) {
1495 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1496 if (!nsk)
1497 goto discard;
1498
1499 if (nsk != sk) {
cfb6eeb4
YH
1500 if (tcp_child_process(sk, nsk, skb)) {
1501 rsk = nsk;
1da177e4 1502 goto reset;
cfb6eeb4 1503 }
1da177e4
LT
1504 return 0;
1505 }
1506 }
1507
1508 TCP_CHECK_TIMER(sk);
aa8223c7 1509 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1510 rsk = sk;
1da177e4 1511 goto reset;
cfb6eeb4 1512 }
1da177e4
LT
1513 TCP_CHECK_TIMER(sk);
1514 return 0;
1515
1516reset:
cfb6eeb4 1517 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1518discard:
1519 kfree_skb(skb);
1520 /* Be careful here. If this function gets more complicated and
1521 * gcc suffers from register pressure on the x86, sk (in %ebx)
1522 * might be destroyed here. This current version compiles correctly,
1523 * but you have been warned.
1524 */
1525 return 0;
1526
1527csum_err:
63231bdd 1528 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1529 goto discard;
1530}
1531
1532/*
1533 * From tcp_input.c
1534 */
1535
1536int tcp_v4_rcv(struct sk_buff *skb)
1537{
eddc9ec5 1538 const struct iphdr *iph;
1da177e4
LT
1539 struct tcphdr *th;
1540 struct sock *sk;
1541 int ret;
a86b1e30 1542 struct net *net = dev_net(skb->dev);
1da177e4
LT
1543
1544 if (skb->pkt_type != PACKET_HOST)
1545 goto discard_it;
1546
1547 /* Count it even if it's bad */
63231bdd 1548 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1549
1550 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1551 goto discard_it;
1552
aa8223c7 1553 th = tcp_hdr(skb);
1da177e4
LT
1554
1555 if (th->doff < sizeof(struct tcphdr) / 4)
1556 goto bad_packet;
1557 if (!pskb_may_pull(skb, th->doff * 4))
1558 goto discard_it;
1559
1560 /* An explanation is required here, I think.
1561 * Packet length and doff are validated by header prediction,
caa20d9a 1562 * provided case of th->doff==0 is eliminated.
1da177e4 1563 * So, we defer the checks. */
60476372 1564 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1565 goto bad_packet;
1566
aa8223c7 1567 th = tcp_hdr(skb);
eddc9ec5 1568 iph = ip_hdr(skb);
1da177e4
LT
1569 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1570 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1571 skb->len - th->doff * 4);
1572 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1573 TCP_SKB_CB(skb)->when = 0;
eddc9ec5 1574 TCP_SKB_CB(skb)->flags = iph->tos;
1da177e4
LT
1575 TCP_SKB_CB(skb)->sacked = 0;
1576
9a1f27c4 1577 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1578 if (!sk)
1579 goto no_tcp_socket;
1580
1581process:
1582 if (sk->sk_state == TCP_TIME_WAIT)
1583 goto do_time_wait;
1584
1585 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1586 goto discard_and_relse;
b59c2701 1587 nf_reset(skb);
1da177e4 1588
fda9ef5d 1589 if (sk_filter(sk, skb))
1da177e4
LT
1590 goto discard_and_relse;
1591
1592 skb->dev = NULL;
1593
c6366184 1594 bh_lock_sock_nested(sk);
1da177e4
LT
1595 ret = 0;
1596 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1597#ifdef CONFIG_NET_DMA
1598 struct tcp_sock *tp = tcp_sk(sk);
1599 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1600 tp->ucopy.dma_chan = get_softnet_dma();
1601 if (tp->ucopy.dma_chan)
1da177e4 1602 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1603 else
1604#endif
1605 {
1606 if (!tcp_prequeue(sk, skb))
1607 ret = tcp_v4_do_rcv(sk, skb);
1608 }
1da177e4
LT
1609 } else
1610 sk_add_backlog(sk, skb);
1611 bh_unlock_sock(sk);
1612
1613 sock_put(sk);
1614
1615 return ret;
1616
1617no_tcp_socket:
1618 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1619 goto discard_it;
1620
1621 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1622bad_packet:
63231bdd 1623 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1624 } else {
cfb6eeb4 1625 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1626 }
1627
1628discard_it:
1629 /* Discard frame. */
1630 kfree_skb(skb);
e905a9ed 1631 return 0;
1da177e4
LT
1632
1633discard_and_relse:
1634 sock_put(sk);
1635 goto discard_it;
1636
1637do_time_wait:
1638 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1639 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1640 goto discard_it;
1641 }
1642
1643 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1644 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1645 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1646 goto discard_it;
1647 }
9469c7b4 1648 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1649 case TCP_TW_SYN: {
c346dca1 1650 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1651 &tcp_hashinfo,
eddc9ec5 1652 iph->daddr, th->dest,
463c84b9 1653 inet_iif(skb));
1da177e4 1654 if (sk2) {
9469c7b4
YH
1655 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1656 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1657 sk = sk2;
1658 goto process;
1659 }
1660 /* Fall through to ACK */
1661 }
1662 case TCP_TW_ACK:
1663 tcp_v4_timewait_ack(sk, skb);
1664 break;
1665 case TCP_TW_RST:
1666 goto no_tcp_socket;
1667 case TCP_TW_SUCCESS:;
1668 }
1669 goto discard_it;
1670}
1671
1da177e4
LT
1672/* VJ's idea. Save last timestamp seen from this destination
1673 * and hold it at least for normal timewait interval to use for duplicate
1674 * segment detection in subsequent connections, before they enter synchronized
1675 * state.
1676 */
1677
1678int tcp_v4_remember_stamp(struct sock *sk)
1679{
1680 struct inet_sock *inet = inet_sk(sk);
1681 struct tcp_sock *tp = tcp_sk(sk);
1682 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1683 struct inet_peer *peer = NULL;
1684 int release_it = 0;
1685
1686 if (!rt || rt->rt_dst != inet->daddr) {
1687 peer = inet_getpeer(inet->daddr, 1);
1688 release_it = 1;
1689 } else {
1690 if (!rt->peer)
1691 rt_bind_peer(rt, 1);
1692 peer = rt->peer;
1693 }
1694
1695 if (peer) {
1696 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
9d729f72 1697 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1da177e4
LT
1698 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1699 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1700 peer->tcp_ts = tp->rx_opt.ts_recent;
1701 }
1702 if (release_it)
1703 inet_putpeer(peer);
1704 return 1;
1705 }
1706
1707 return 0;
1708}
1709
8feaf0c0 1710int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1da177e4 1711{
8feaf0c0 1712 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1da177e4
LT
1713
1714 if (peer) {
8feaf0c0
ACM
1715 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1716
1717 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
9d729f72 1718 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
8feaf0c0
ACM
1719 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1720 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1721 peer->tcp_ts = tcptw->tw_ts_recent;
1da177e4
LT
1722 }
1723 inet_putpeer(peer);
1724 return 1;
1725 }
1726
1727 return 0;
1728}
1729
8292a17a 1730struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1731 .queue_xmit = ip_queue_xmit,
1732 .send_check = tcp_v4_send_check,
1733 .rebuild_header = inet_sk_rebuild_header,
1734 .conn_request = tcp_v4_conn_request,
1735 .syn_recv_sock = tcp_v4_syn_recv_sock,
1736 .remember_stamp = tcp_v4_remember_stamp,
1737 .net_header_len = sizeof(struct iphdr),
1738 .setsockopt = ip_setsockopt,
1739 .getsockopt = ip_getsockopt,
1740 .addr2sockaddr = inet_csk_addr2sockaddr,
1741 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1742 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1743#ifdef CONFIG_COMPAT
543d9cfe
ACM
1744 .compat_setsockopt = compat_ip_setsockopt,
1745 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1746#endif
1da177e4
LT
1747};
1748
cfb6eeb4 1749#ifdef CONFIG_TCP_MD5SIG
b6332e6c 1750static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1751 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1752 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4
YH
1753 .md5_add = tcp_v4_md5_add_func,
1754 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1755};
b6332e6c 1756#endif
cfb6eeb4 1757
1da177e4
LT
1758/* NOTE: A lot of things set to zero explicitly by call to
1759 * sk_alloc() so need not be done here.
1760 */
1761static int tcp_v4_init_sock(struct sock *sk)
1762{
6687e988 1763 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1764 struct tcp_sock *tp = tcp_sk(sk);
1765
1766 skb_queue_head_init(&tp->out_of_order_queue);
1767 tcp_init_xmit_timers(sk);
1768 tcp_prequeue_init(tp);
1769
6687e988 1770 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1771 tp->mdev = TCP_TIMEOUT_INIT;
1772
1773 /* So many TCP implementations out there (incorrectly) count the
1774 * initial SYN frame in their delayed-ACK and congestion control
1775 * algorithms that we must have the following bandaid to talk
1776 * efficiently to them. -DaveM
1777 */
1778 tp->snd_cwnd = 2;
1779
1780 /* See draft-stevens-tcpca-spec-01 for discussion of the
1781 * initialization of these values.
1782 */
1783 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1784 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1785 tp->mss_cache = 536;
1da177e4
LT
1786
1787 tp->reordering = sysctl_tcp_reordering;
6687e988 1788 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1789
1790 sk->sk_state = TCP_CLOSE;
1791
1792 sk->sk_write_space = sk_stream_write_space;
1793 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1794
8292a17a 1795 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1796 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1797#ifdef CONFIG_TCP_MD5SIG
1798 tp->af_specific = &tcp_sock_ipv4_specific;
1799#endif
1da177e4
LT
1800
1801 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1802 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1803
1804 atomic_inc(&tcp_sockets_allocated);
1805
1806 return 0;
1807}
1808
7d06b2e0 1809void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1810{
1811 struct tcp_sock *tp = tcp_sk(sk);
1812
1813 tcp_clear_xmit_timers(sk);
1814
6687e988 1815 tcp_cleanup_congestion_control(sk);
317a76f9 1816
1da177e4 1817 /* Cleanup up the write buffer. */
fe067e8a 1818 tcp_write_queue_purge(sk);
1da177e4
LT
1819
1820 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1821 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1822
cfb6eeb4
YH
1823#ifdef CONFIG_TCP_MD5SIG
1824 /* Clean up the MD5 key list, if any */
1825 if (tp->md5sig_info) {
1826 tcp_v4_clear_md5_list(sk);
1827 kfree(tp->md5sig_info);
1828 tp->md5sig_info = NULL;
1829 }
1830#endif
1831
1a2449a8
CL
1832#ifdef CONFIG_NET_DMA
1833 /* Cleans up our sk_async_wait_queue */
e905a9ed 1834 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1835#endif
1836
1da177e4
LT
1837 /* Clean prequeue, it must be empty really */
1838 __skb_queue_purge(&tp->ucopy.prequeue);
1839
1840 /* Clean up a referenced TCP bind bucket. */
463c84b9 1841 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1842 inet_put_port(sk);
1da177e4
LT
1843
1844 /*
1845 * If sendmsg cached page exists, toss it.
1846 */
1847 if (sk->sk_sndmsg_page) {
1848 __free_page(sk->sk_sndmsg_page);
1849 sk->sk_sndmsg_page = NULL;
1850 }
1851
1852 atomic_dec(&tcp_sockets_allocated);
1da177e4
LT
1853}
1854
1855EXPORT_SYMBOL(tcp_v4_destroy_sock);
1856
1857#ifdef CONFIG_PROC_FS
1858/* Proc filesystem TCP sock list dumping. */
1859
8feaf0c0 1860static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1da177e4
LT
1861{
1862 return hlist_empty(head) ? NULL :
8feaf0c0 1863 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1864}
1865
8feaf0c0 1866static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4
LT
1867{
1868 return tw->tw_node.next ?
1869 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1870}
1871
1872static void *listening_get_next(struct seq_file *seq, void *cur)
1873{
463c84b9 1874 struct inet_connection_sock *icsk;
1da177e4
LT
1875 struct hlist_node *node;
1876 struct sock *sk = cur;
1877 struct tcp_iter_state* st = seq->private;
a4146b1b 1878 struct net *net = seq_file_net(seq);
1da177e4
LT
1879
1880 if (!sk) {
1881 st->bucket = 0;
6e04e021 1882 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1da177e4
LT
1883 goto get_sk;
1884 }
1885
1886 ++st->num;
1887
1888 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1889 struct request_sock *req = cur;
1da177e4 1890
72a3effa 1891 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
1892 req = req->dl_next;
1893 while (1) {
1894 while (req) {
bdccc4ca 1895 if (req->rsk_ops->family == st->family) {
1da177e4
LT
1896 cur = req;
1897 goto out;
1898 }
1899 req = req->dl_next;
1900 }
72a3effa 1901 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
1902 break;
1903get_req:
463c84b9 1904 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
1905 }
1906 sk = sk_next(st->syn_wait_sk);
1907 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 1908 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 1909 } else {
e905a9ed 1910 icsk = inet_csk(sk);
463c84b9
ACM
1911 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1912 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 1913 goto start_req;
463c84b9 1914 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
1915 sk = sk_next(sk);
1916 }
1917get_sk:
1918 sk_for_each_from(sk, node) {
878628fb 1919 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1da177e4
LT
1920 cur = sk;
1921 goto out;
1922 }
e905a9ed 1923 icsk = inet_csk(sk);
463c84b9
ACM
1924 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1925 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
1926start_req:
1927 st->uid = sock_i_uid(sk);
1928 st->syn_wait_sk = sk;
1929 st->state = TCP_SEQ_STATE_OPENREQ;
1930 st->sbucket = 0;
1931 goto get_req;
1932 }
463c84b9 1933 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 1934 }
0f7ff927 1935 if (++st->bucket < INET_LHTABLE_SIZE) {
6e04e021 1936 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1da177e4
LT
1937 goto get_sk;
1938 }
1939 cur = NULL;
1940out:
1941 return cur;
1942}
1943
1944static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1945{
1946 void *rc = listening_get_next(seq, NULL);
1947
1948 while (rc && *pos) {
1949 rc = listening_get_next(seq, rc);
1950 --*pos;
1951 }
1952 return rc;
1953}
1954
6eac5604
AK
1955static inline int empty_bucket(struct tcp_iter_state *st)
1956{
1957 return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1958 hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
1959}
1960
1da177e4
LT
1961static void *established_get_first(struct seq_file *seq)
1962{
1963 struct tcp_iter_state* st = seq->private;
a4146b1b 1964 struct net *net = seq_file_net(seq);
1da177e4
LT
1965 void *rc = NULL;
1966
6e04e021 1967 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1da177e4
LT
1968 struct sock *sk;
1969 struct hlist_node *node;
8feaf0c0 1970 struct inet_timewait_sock *tw;
230140cf 1971 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 1972
6eac5604
AK
1973 /* Lockless fast path for the common case of empty buckets */
1974 if (empty_bucket(st))
1975 continue;
1976
230140cf 1977 read_lock_bh(lock);
6e04e021 1978 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 1979 if (sk->sk_family != st->family ||
878628fb 1980 !net_eq(sock_net(sk), net)) {
1da177e4
LT
1981 continue;
1982 }
1983 rc = sk;
1984 goto out;
1985 }
1986 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 1987 inet_twsk_for_each(tw, node,
dbca9b27 1988 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 1989 if (tw->tw_family != st->family ||
878628fb 1990 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
1991 continue;
1992 }
1993 rc = tw;
1994 goto out;
1995 }
230140cf 1996 read_unlock_bh(lock);
1da177e4
LT
1997 st->state = TCP_SEQ_STATE_ESTABLISHED;
1998 }
1999out:
2000 return rc;
2001}
2002
2003static void *established_get_next(struct seq_file *seq, void *cur)
2004{
2005 struct sock *sk = cur;
8feaf0c0 2006 struct inet_timewait_sock *tw;
1da177e4
LT
2007 struct hlist_node *node;
2008 struct tcp_iter_state* st = seq->private;
a4146b1b 2009 struct net *net = seq_file_net(seq);
1da177e4
LT
2010
2011 ++st->num;
2012
2013 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2014 tw = cur;
2015 tw = tw_next(tw);
2016get_tw:
878628fb 2017 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2018 tw = tw_next(tw);
2019 }
2020 if (tw) {
2021 cur = tw;
2022 goto out;
2023 }
230140cf 2024 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2025 st->state = TCP_SEQ_STATE_ESTABLISHED;
2026
6eac5604
AK
2027 /* Look for next non empty bucket */
2028 while (++st->bucket < tcp_hashinfo.ehash_size &&
2029 empty_bucket(st))
2030 ;
2031 if (st->bucket >= tcp_hashinfo.ehash_size)
2032 return NULL;
2033
2034 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2035 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4
LT
2036 } else
2037 sk = sk_next(sk);
2038
2039 sk_for_each_from(sk, node) {
878628fb 2040 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2041 goto found;
2042 }
2043
2044 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2045 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2046 goto get_tw;
2047found:
2048 cur = sk;
2049out:
2050 return cur;
2051}
2052
2053static void *established_get_idx(struct seq_file *seq, loff_t pos)
2054{
2055 void *rc = established_get_first(seq);
2056
2057 while (rc && pos) {
2058 rc = established_get_next(seq, rc);
2059 --pos;
7174259e 2060 }
1da177e4
LT
2061 return rc;
2062}
2063
2064static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2065{
2066 void *rc;
2067 struct tcp_iter_state* st = seq->private;
2068
f3f05f70 2069 inet_listen_lock(&tcp_hashinfo);
1da177e4
LT
2070 st->state = TCP_SEQ_STATE_LISTENING;
2071 rc = listening_get_idx(seq, &pos);
2072
2073 if (!rc) {
f3f05f70 2074 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2075 st->state = TCP_SEQ_STATE_ESTABLISHED;
2076 rc = established_get_idx(seq, pos);
2077 }
2078
2079 return rc;
2080}
2081
2082static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2083{
2084 struct tcp_iter_state* st = seq->private;
2085 st->state = TCP_SEQ_STATE_LISTENING;
2086 st->num = 0;
2087 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2088}
2089
2090static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2091{
2092 void *rc = NULL;
2093 struct tcp_iter_state* st;
2094
2095 if (v == SEQ_START_TOKEN) {
2096 rc = tcp_get_idx(seq, 0);
2097 goto out;
2098 }
2099 st = seq->private;
2100
2101 switch (st->state) {
2102 case TCP_SEQ_STATE_OPENREQ:
2103 case TCP_SEQ_STATE_LISTENING:
2104 rc = listening_get_next(seq, v);
2105 if (!rc) {
f3f05f70 2106 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2107 st->state = TCP_SEQ_STATE_ESTABLISHED;
2108 rc = established_get_first(seq);
2109 }
2110 break;
2111 case TCP_SEQ_STATE_ESTABLISHED:
2112 case TCP_SEQ_STATE_TIME_WAIT:
2113 rc = established_get_next(seq, v);
2114 break;
2115 }
2116out:
2117 ++*pos;
2118 return rc;
2119}
2120
2121static void tcp_seq_stop(struct seq_file *seq, void *v)
2122{
2123 struct tcp_iter_state* st = seq->private;
2124
2125 switch (st->state) {
2126 case TCP_SEQ_STATE_OPENREQ:
2127 if (v) {
463c84b9
ACM
2128 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2129 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2130 }
2131 case TCP_SEQ_STATE_LISTENING:
2132 if (v != SEQ_START_TOKEN)
f3f05f70 2133 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2134 break;
2135 case TCP_SEQ_STATE_TIME_WAIT:
2136 case TCP_SEQ_STATE_ESTABLISHED:
2137 if (v)
230140cf 2138 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2139 break;
2140 }
2141}
2142
2143static int tcp_seq_open(struct inode *inode, struct file *file)
2144{
2145 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2146 struct tcp_iter_state *s;
52d6f3f1 2147 int err;
1da177e4 2148
52d6f3f1
DL
2149 err = seq_open_net(inode, file, &afinfo->seq_ops,
2150 sizeof(struct tcp_iter_state));
2151 if (err < 0)
2152 return err;
f40c8174 2153
52d6f3f1 2154 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2155 s->family = afinfo->family;
f40c8174
DL
2156 return 0;
2157}
2158
6f8b13bc 2159int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2160{
2161 int rc = 0;
2162 struct proc_dir_entry *p;
2163
68fcadd1
DL
2164 afinfo->seq_fops.open = tcp_seq_open;
2165 afinfo->seq_fops.read = seq_read;
2166 afinfo->seq_fops.llseek = seq_lseek;
2167 afinfo->seq_fops.release = seq_release_net;
7174259e 2168
9427c4b3
DL
2169 afinfo->seq_ops.start = tcp_seq_start;
2170 afinfo->seq_ops.next = tcp_seq_next;
2171 afinfo->seq_ops.stop = tcp_seq_stop;
2172
84841c3c
DL
2173 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2174 &afinfo->seq_fops, afinfo);
2175 if (!p)
1da177e4
LT
2176 rc = -ENOMEM;
2177 return rc;
2178}
2179
6f8b13bc 2180void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2181{
6f8b13bc 2182 proc_net_remove(net, afinfo->name);
1da177e4
LT
2183}
2184
60236fdd 2185static void get_openreq4(struct sock *sk, struct request_sock *req,
5e659e4c 2186 struct seq_file *f, int i, int uid, int *len)
1da177e4 2187{
2e6599cb 2188 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2189 int ttd = req->expires - jiffies;
2190
5e659e4c
PE
2191 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2192 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
1da177e4 2193 i,
2e6599cb 2194 ireq->loc_addr,
1da177e4 2195 ntohs(inet_sk(sk)->sport),
2e6599cb
ACM
2196 ireq->rmt_addr,
2197 ntohs(ireq->rmt_port),
1da177e4
LT
2198 TCP_SYN_RECV,
2199 0, 0, /* could print option size, but that is af dependent. */
2200 1, /* timers active (only the expire timer) */
2201 jiffies_to_clock_t(ttd),
2202 req->retrans,
2203 uid,
2204 0, /* non standard timer */
2205 0, /* open_requests have no inode */
2206 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2207 req,
2208 len);
1da177e4
LT
2209}
2210
5e659e4c 2211static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2212{
2213 int timer_active;
2214 unsigned long timer_expires;
cf4c6bf8
IJ
2215 struct tcp_sock *tp = tcp_sk(sk);
2216 const struct inet_connection_sock *icsk = inet_csk(sk);
2217 struct inet_sock *inet = inet_sk(sk);
714e85be
AV
2218 __be32 dest = inet->daddr;
2219 __be32 src = inet->rcv_saddr;
1da177e4
LT
2220 __u16 destp = ntohs(inet->dport);
2221 __u16 srcp = ntohs(inet->sport);
2222
463c84b9 2223 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2224 timer_active = 1;
463c84b9
ACM
2225 timer_expires = icsk->icsk_timeout;
2226 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2227 timer_active = 4;
463c84b9 2228 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2229 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2230 timer_active = 2;
cf4c6bf8 2231 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2232 } else {
2233 timer_active = 0;
2234 timer_expires = jiffies;
2235 }
2236
5e659e4c 2237 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
7be87351 2238 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
cf4c6bf8 2239 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2240 tp->write_seq - tp->snd_una,
cf4c6bf8 2241 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
7174259e 2242 (tp->rcv_nxt - tp->copied_seq),
1da177e4
LT
2243 timer_active,
2244 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2245 icsk->icsk_retransmits,
cf4c6bf8 2246 sock_i_uid(sk),
6687e988 2247 icsk->icsk_probes_out,
cf4c6bf8
IJ
2248 sock_i_ino(sk),
2249 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2250 jiffies_to_clock_t(icsk->icsk_rto),
2251 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2252 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2253 tp->snd_cwnd,
5e659e4c
PE
2254 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2255 len);
1da177e4
LT
2256}
2257
7174259e 2258static void get_timewait4_sock(struct inet_timewait_sock *tw,
5e659e4c 2259 struct seq_file *f, int i, int *len)
1da177e4 2260{
23f33c2d 2261 __be32 dest, src;
1da177e4
LT
2262 __u16 destp, srcp;
2263 int ttd = tw->tw_ttd - jiffies;
2264
2265 if (ttd < 0)
2266 ttd = 0;
2267
2268 dest = tw->tw_daddr;
2269 src = tw->tw_rcv_saddr;
2270 destp = ntohs(tw->tw_dport);
2271 srcp = ntohs(tw->tw_sport);
2272
5e659e4c
PE
2273 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2274 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
1da177e4
LT
2275 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2276 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2277 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2278}
2279
2280#define TMPSZ 150
2281
2282static int tcp4_seq_show(struct seq_file *seq, void *v)
2283{
2284 struct tcp_iter_state* st;
5e659e4c 2285 int len;
1da177e4
LT
2286
2287 if (v == SEQ_START_TOKEN) {
2288 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2289 " sl local_address rem_address st tx_queue "
2290 "rx_queue tr tm->when retrnsmt uid timeout "
2291 "inode");
2292 goto out;
2293 }
2294 st = seq->private;
2295
2296 switch (st->state) {
2297 case TCP_SEQ_STATE_LISTENING:
2298 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2299 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2300 break;
2301 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2302 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2303 break;
2304 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2305 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2306 break;
2307 }
5e659e4c 2308 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2309out:
2310 return 0;
2311}
2312
1da177e4 2313static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2314 .name = "tcp",
2315 .family = AF_INET,
5f4472c5
DL
2316 .seq_fops = {
2317 .owner = THIS_MODULE,
2318 },
9427c4b3
DL
2319 .seq_ops = {
2320 .show = tcp4_seq_show,
2321 },
1da177e4
LT
2322};
2323
757764f6
PE
2324static int tcp4_proc_init_net(struct net *net)
2325{
2326 return tcp_proc_register(net, &tcp4_seq_afinfo);
2327}
2328
2329static void tcp4_proc_exit_net(struct net *net)
2330{
2331 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2332}
2333
2334static struct pernet_operations tcp4_net_ops = {
2335 .init = tcp4_proc_init_net,
2336 .exit = tcp4_proc_exit_net,
2337};
2338
1da177e4
LT
2339int __init tcp4_proc_init(void)
2340{
757764f6 2341 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2342}
2343
2344void tcp4_proc_exit(void)
2345{
757764f6 2346 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2347}
2348#endif /* CONFIG_PROC_FS */
2349
2350struct proto tcp_prot = {
2351 .name = "TCP",
2352 .owner = THIS_MODULE,
2353 .close = tcp_close,
2354 .connect = tcp_v4_connect,
2355 .disconnect = tcp_disconnect,
463c84b9 2356 .accept = inet_csk_accept,
1da177e4
LT
2357 .ioctl = tcp_ioctl,
2358 .init = tcp_v4_init_sock,
2359 .destroy = tcp_v4_destroy_sock,
2360 .shutdown = tcp_shutdown,
2361 .setsockopt = tcp_setsockopt,
2362 .getsockopt = tcp_getsockopt,
1da177e4
LT
2363 .recvmsg = tcp_recvmsg,
2364 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2365 .hash = inet_hash,
2366 .unhash = inet_unhash,
2367 .get_port = inet_csk_get_port,
1da177e4
LT
2368 .enter_memory_pressure = tcp_enter_memory_pressure,
2369 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2370 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2371 .memory_allocated = &tcp_memory_allocated,
2372 .memory_pressure = &tcp_memory_pressure,
2373 .sysctl_mem = sysctl_tcp_mem,
2374 .sysctl_wmem = sysctl_tcp_wmem,
2375 .sysctl_rmem = sysctl_tcp_rmem,
2376 .max_header = MAX_TCP_HEADER,
2377 .obj_size = sizeof(struct tcp_sock),
6d6ee43e 2378 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2379 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2380 .h.hashinfo = &tcp_hashinfo,
543d9cfe
ACM
2381#ifdef CONFIG_COMPAT
2382 .compat_setsockopt = compat_tcp_setsockopt,
2383 .compat_getsockopt = compat_tcp_getsockopt,
2384#endif
1da177e4
LT
2385};
2386
046ee902
DL
2387
2388static int __net_init tcp_sk_init(struct net *net)
2389{
2390 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2391 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2392}
2393
2394static void __net_exit tcp_sk_exit(struct net *net)
2395{
2396 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
d315492b 2397 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2398}
2399
2400static struct pernet_operations __net_initdata tcp_sk_ops = {
2401 .init = tcp_sk_init,
2402 .exit = tcp_sk_exit,
2403};
2404
9b0f976f 2405void __init tcp_v4_init(void)
1da177e4 2406{
046ee902 2407 if (register_pernet_device(&tcp_sk_ops))
1da177e4 2408 panic("Failed to create the TCP control socket.\n");
1da177e4
LT
2409}
2410
2411EXPORT_SYMBOL(ipv4_specific);
1da177e4 2412EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 2413EXPORT_SYMBOL(tcp_prot);
1da177e4
LT
2414EXPORT_SYMBOL(tcp_v4_conn_request);
2415EXPORT_SYMBOL(tcp_v4_connect);
2416EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
2417EXPORT_SYMBOL(tcp_v4_remember_stamp);
2418EXPORT_SYMBOL(tcp_v4_send_check);
2419EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2420
2421#ifdef CONFIG_PROC_FS
2422EXPORT_SYMBOL(tcp_proc_register);
2423EXPORT_SYMBOL(tcp_proc_unregister);
2424#endif
1da177e4 2425EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 2426