]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/tcp_ipv4.c
[TCP_IPV4]: CodingStyle cleanups, no code change
[net-next-2.6.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions
11 *
12 *
13 * code split from:
14 * linux/ipv4/tcp.c
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
17 *
18 * See tcp.c for author information
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26/*
27 * Changes:
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
60236fdd 39 * request_sock handling and moved
1da177e4
LT
40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
caa20d9a 42 * Added new listen semantics.
1da177e4
LT
43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
47 * coma.
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
53 */
54
1da177e4
LT
55
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64
65#include <net/icmp.h>
304a1618 66#include <net/inet_hashtables.h>
1da177e4 67#include <net/tcp.h>
20380731 68#include <net/transp_v6.h>
1da177e4
LT
69#include <net/ipv6.h>
70#include <net/inet_common.h>
6d6ee43e 71#include <net/timewait_sock.h>
1da177e4 72#include <net/xfrm.h>
1a2449a8 73#include <net/netdma.h>
1da177e4
LT
74
75#include <linux/inet.h>
76#include <linux/ipv6.h>
77#include <linux/stddef.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80
cfb6eeb4
YH
81#include <linux/crypto.h>
82#include <linux/scatterlist.h>
83
ab32ea5d
BH
84int sysctl_tcp_tw_reuse __read_mostly;
85int sysctl_tcp_low_latency __read_mostly;
1da177e4
LT
86
87/* Check TCP sequence numbers in ICMP packets. */
88#define ICMP_MIN_LENGTH 8
89
90/* Socket used for sending RSTs */
91static struct socket *tcp_socket;
92
8292a17a 93void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
1da177e4 94
cfb6eeb4 95#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
96static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
97 __be32 addr);
cfb6eeb4 98static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
7174259e
ACM
99 __be32 saddr, __be32 daddr,
100 struct tcphdr *th, int protocol,
101 int tcplen);
cfb6eeb4
YH
102#endif
103
0f7ff927 104struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
7174259e
ACM
105 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
106 .lhash_users = ATOMIC_INIT(0),
107 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
1da177e4
LT
108};
109
463c84b9
ACM
110static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
111{
971af18b
ACM
112 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
113 inet_csk_bind_conflict);
463c84b9
ACM
114}
115
1da177e4
LT
116static void tcp_v4_hash(struct sock *sk)
117{
81849d10 118 inet_hash(&tcp_hashinfo, sk);
1da177e4
LT
119}
120
121void tcp_unhash(struct sock *sk)
122{
81849d10 123 inet_unhash(&tcp_hashinfo, sk);
1da177e4
LT
124}
125
a94f723d 126static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1da177e4
LT
127{
128 return secure_tcp_sequence_number(skb->nh.iph->daddr,
129 skb->nh.iph->saddr,
130 skb->h.th->dest,
131 skb->h.th->source);
132}
133
6d6ee43e
ACM
134int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
135{
136 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
137 struct tcp_sock *tp = tcp_sk(sk);
138
139 /* With PAWS, it is safe from the viewpoint
140 of data integrity. Even without PAWS it is safe provided sequence
141 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
142
143 Actually, the idea is close to VJ's one, only timestamp cache is
144 held not per host, but per port pair and TW bucket is used as state
145 holder.
146
147 If TW bucket has been already destroyed we fall back to VJ's scheme
148 and use initial timestamp retrieved from peer table.
149 */
150 if (tcptw->tw_ts_recent_stamp &&
151 (twp == NULL || (sysctl_tcp_tw_reuse &&
152 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
153 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
154 if (tp->write_seq == 0)
155 tp->write_seq = 1;
156 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
157 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
158 sock_hold(sktw);
159 return 1;
160 }
161
162 return 0;
163}
164
165EXPORT_SYMBOL_GPL(tcp_twsk_unique);
166
1da177e4
LT
167/* This will initiate an outgoing connection. */
168int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
169{
170 struct inet_sock *inet = inet_sk(sk);
171 struct tcp_sock *tp = tcp_sk(sk);
172 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
173 struct rtable *rt;
bada8adc 174 __be32 daddr, nexthop;
1da177e4
LT
175 int tmp;
176 int err;
177
178 if (addr_len < sizeof(struct sockaddr_in))
179 return -EINVAL;
180
181 if (usin->sin_family != AF_INET)
182 return -EAFNOSUPPORT;
183
184 nexthop = daddr = usin->sin_addr.s_addr;
185 if (inet->opt && inet->opt->srr) {
186 if (!daddr)
187 return -EINVAL;
188 nexthop = inet->opt->faddr;
189 }
190
191 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
192 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
193 IPPROTO_TCP,
194 inet->sport, usin->sin_port, sk);
195 if (tmp < 0)
196 return tmp;
197
198 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
199 ip_rt_put(rt);
200 return -ENETUNREACH;
201 }
202
203 if (!inet->opt || !inet->opt->srr)
204 daddr = rt->rt_dst;
205
206 if (!inet->saddr)
207 inet->saddr = rt->rt_src;
208 inet->rcv_saddr = inet->saddr;
209
210 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
211 /* Reset inherited state */
212 tp->rx_opt.ts_recent = 0;
213 tp->rx_opt.ts_recent_stamp = 0;
214 tp->write_seq = 0;
215 }
216
295ff7ed 217 if (tcp_death_row.sysctl_tw_recycle &&
1da177e4
LT
218 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
219 struct inet_peer *peer = rt_get_peer(rt);
7174259e
ACM
220 /*
221 * VJ's idea. We save last timestamp seen from
222 * the destination in peer table, when entering state
223 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
224 * when trying new connection.
1da177e4 225 */
7174259e
ACM
226 if (peer != NULL &&
227 peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
1da177e4
LT
228 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
229 tp->rx_opt.ts_recent = peer->tcp_ts;
230 }
231 }
232
233 inet->dport = usin->sin_port;
234 inet->daddr = daddr;
235
d83d8461 236 inet_csk(sk)->icsk_ext_hdr_len = 0;
1da177e4 237 if (inet->opt)
d83d8461 238 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
1da177e4
LT
239
240 tp->rx_opt.mss_clamp = 536;
241
242 /* Socket identity is still unknown (sport may be zero).
243 * However we set state to SYN-SENT and not releasing socket
244 * lock select source port, enter ourselves into the hash tables and
245 * complete initialization after this.
246 */
247 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 248 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
249 if (err)
250 goto failure;
251
7174259e
ACM
252 err = ip_route_newports(&rt, IPPROTO_TCP,
253 inet->sport, inet->dport, sk);
1da177e4
LT
254 if (err)
255 goto failure;
256
257 /* OK, now commit destination to socket. */
bcd76111 258 sk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 259 sk_setup_caps(sk, &rt->u.dst);
1da177e4
LT
260
261 if (!tp->write_seq)
262 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
263 inet->daddr,
264 inet->sport,
265 usin->sin_port);
266
267 inet->id = tp->write_seq ^ jiffies;
268
269 err = tcp_connect(sk);
270 rt = NULL;
271 if (err)
272 goto failure;
273
274 return 0;
275
276failure:
7174259e
ACM
277 /*
278 * This unhashes the socket and releases the local port,
279 * if necessary.
280 */
1da177e4
LT
281 tcp_set_state(sk, TCP_CLOSE);
282 ip_rt_put(rt);
283 sk->sk_route_caps = 0;
284 inet->dport = 0;
285 return err;
286}
287
1da177e4
LT
288/*
289 * This routine does path mtu discovery as defined in RFC1191.
290 */
40efc6fa 291static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
1da177e4
LT
292{
293 struct dst_entry *dst;
294 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
295
296 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
297 * send out by Linux are always <576bytes so they should go through
298 * unfragmented).
299 */
300 if (sk->sk_state == TCP_LISTEN)
301 return;
302
303 /* We don't check in the destentry if pmtu discovery is forbidden
304 * on this route. We just assume that no packet_to_big packets
305 * are send back when pmtu discovery is not active.
306 * There is a small race when the user changes this flag in the
307 * route, but I think that's acceptable.
308 */
309 if ((dst = __sk_dst_check(sk, 0)) == NULL)
310 return;
311
312 dst->ops->update_pmtu(dst, mtu);
313
314 /* Something is about to be wrong... Remember soft error
315 * for the case, if this connection will not able to recover.
316 */
317 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
318 sk->sk_err_soft = EMSGSIZE;
319
320 mtu = dst_mtu(dst);
321
322 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 323 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
324 tcp_sync_mss(sk, mtu);
325
326 /* Resend the TCP packet because it's
327 * clear that the old packet has been
328 * dropped. This is the new "fast" path mtu
329 * discovery.
330 */
331 tcp_simple_retransmit(sk);
332 } /* else let the usual retransmit timer handle it */
333}
334
335/*
336 * This routine is called by the ICMP module when it gets some
337 * sort of error condition. If err < 0 then the socket should
338 * be closed and the error returned to the user. If err > 0
339 * it's just the icmp type << 8 | icmp code. After adjustment
340 * header points to the first 8 bytes of the tcp header. We need
341 * to find the appropriate port.
342 *
343 * The locking strategy used here is very "optimistic". When
344 * someone else accesses the socket the ICMP is just dropped
345 * and for some paths there is no check at all.
346 * A more general error queue to queue errors for later handling
347 * is probably better.
348 *
349 */
350
351void tcp_v4_err(struct sk_buff *skb, u32 info)
352{
353 struct iphdr *iph = (struct iphdr *)skb->data;
354 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
355 struct tcp_sock *tp;
356 struct inet_sock *inet;
357 int type = skb->h.icmph->type;
358 int code = skb->h.icmph->code;
359 struct sock *sk;
360 __u32 seq;
361 int err;
362
363 if (skb->len < (iph->ihl << 2) + 8) {
364 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
365 return;
366 }
367
e48c414e 368 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
463c84b9 369 th->source, inet_iif(skb));
1da177e4
LT
370 if (!sk) {
371 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
372 return;
373 }
374 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 375 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
376 return;
377 }
378
379 bh_lock_sock(sk);
380 /* If too many ICMPs get dropped on busy
381 * servers this needs to be solved differently.
382 */
383 if (sock_owned_by_user(sk))
384 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
385
386 if (sk->sk_state == TCP_CLOSE)
387 goto out;
388
389 tp = tcp_sk(sk);
390 seq = ntohl(th->seq);
391 if (sk->sk_state != TCP_LISTEN &&
392 !between(seq, tp->snd_una, tp->snd_nxt)) {
06ca719f 393 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
394 goto out;
395 }
396
397 switch (type) {
398 case ICMP_SOURCE_QUENCH:
399 /* Just silently ignore these. */
400 goto out;
401 case ICMP_PARAMETERPROB:
402 err = EPROTO;
403 break;
404 case ICMP_DEST_UNREACH:
405 if (code > NR_ICMP_UNREACH)
406 goto out;
407
408 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
409 if (!sock_owned_by_user(sk))
410 do_pmtu_discovery(sk, iph, info);
411 goto out;
412 }
413
414 err = icmp_err_convert[code].errno;
415 break;
416 case ICMP_TIME_EXCEEDED:
417 err = EHOSTUNREACH;
418 break;
419 default:
420 goto out;
421 }
422
423 switch (sk->sk_state) {
60236fdd 424 struct request_sock *req, **prev;
1da177e4
LT
425 case TCP_LISTEN:
426 if (sock_owned_by_user(sk))
427 goto out;
428
463c84b9
ACM
429 req = inet_csk_search_req(sk, &prev, th->dest,
430 iph->daddr, iph->saddr);
1da177e4
LT
431 if (!req)
432 goto out;
433
434 /* ICMPs are not backlogged, hence we cannot get
435 an established socket here.
436 */
437 BUG_TRAP(!req->sk);
438
2e6599cb 439 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
440 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
441 goto out;
442 }
443
444 /*
445 * Still in SYN_RECV, just remove it silently.
446 * There is no good way to pass the error to the newly
447 * created socket, and POSIX does not want network
448 * errors returned from accept().
449 */
463c84b9 450 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
451 goto out;
452
453 case TCP_SYN_SENT:
454 case TCP_SYN_RECV: /* Cannot happen.
455 It can f.e. if SYNs crossed.
456 */
457 if (!sock_owned_by_user(sk)) {
1da177e4
LT
458 sk->sk_err = err;
459
460 sk->sk_error_report(sk);
461
462 tcp_done(sk);
463 } else {
464 sk->sk_err_soft = err;
465 }
466 goto out;
467 }
468
469 /* If we've already connected we will keep trying
470 * until we time out, or the user gives up.
471 *
472 * rfc1122 4.2.3.9 allows to consider as hard errors
473 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
474 * but it is obsoleted by pmtu discovery).
475 *
476 * Note, that in modern internet, where routing is unreliable
477 * and in each dark corner broken firewalls sit, sending random
478 * errors ordered by their masters even this two messages finally lose
479 * their original sense (even Linux sends invalid PORT_UNREACHs)
480 *
481 * Now we are in compliance with RFCs.
482 * --ANK (980905)
483 */
484
485 inet = inet_sk(sk);
486 if (!sock_owned_by_user(sk) && inet->recverr) {
487 sk->sk_err = err;
488 sk->sk_error_report(sk);
489 } else { /* Only an error on timeout */
490 sk->sk_err_soft = err;
491 }
492
493out:
494 bh_unlock_sock(sk);
495 sock_put(sk);
496}
497
498/* This routine computes an IPv4 TCP checksum. */
8292a17a 499void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
1da177e4
LT
500{
501 struct inet_sock *inet = inet_sk(sk);
8292a17a 502 struct tcphdr *th = skb->h.th;
1da177e4 503
84fa7933 504 if (skb->ip_summed == CHECKSUM_PARTIAL) {
7174259e
ACM
505 th->check = ~tcp_v4_check(th, len,
506 inet->saddr, inet->daddr, 0);
1da177e4
LT
507 skb->csum = offsetof(struct tcphdr, check);
508 } else {
509 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
510 csum_partial((char *)th,
511 th->doff << 2,
512 skb->csum));
513 }
514}
515
a430a43d
HX
516int tcp_v4_gso_send_check(struct sk_buff *skb)
517{
518 struct iphdr *iph;
519 struct tcphdr *th;
520
521 if (!pskb_may_pull(skb, sizeof(*th)))
522 return -EINVAL;
523
524 iph = skb->nh.iph;
525 th = skb->h.th;
526
527 th->check = 0;
528 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
529 skb->csum = offsetof(struct tcphdr, check);
84fa7933 530 skb->ip_summed = CHECKSUM_PARTIAL;
a430a43d
HX
531 return 0;
532}
533
1da177e4
LT
534/*
535 * This routine will send an RST to the other tcp.
536 *
537 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
538 * for reset.
539 * Answer: if a packet caused RST, it is not for a socket
540 * existing in our system, if it is matched to a socket,
541 * it is just duplicate segment or bug in other side's TCP.
542 * So that we build reply only basing on parameters
543 * arrived with segment.
544 * Exception: precedence violation. We do not implement it in any case.
545 */
546
cfb6eeb4 547static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4
LT
548{
549 struct tcphdr *th = skb->h.th;
cfb6eeb4
YH
550 struct {
551 struct tcphdr th;
552#ifdef CONFIG_TCP_MD5SIG
714e85be 553 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
554#endif
555 } rep;
1da177e4 556 struct ip_reply_arg arg;
cfb6eeb4
YH
557#ifdef CONFIG_TCP_MD5SIG
558 struct tcp_md5sig_key *key;
559#endif
1da177e4
LT
560
561 /* Never send a reset in response to a reset. */
562 if (th->rst)
563 return;
564
565 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
566 return;
567
568 /* Swap the send and the receive. */
cfb6eeb4
YH
569 memset(&rep, 0, sizeof(rep));
570 rep.th.dest = th->source;
571 rep.th.source = th->dest;
572 rep.th.doff = sizeof(struct tcphdr) / 4;
573 rep.th.rst = 1;
1da177e4
LT
574
575 if (th->ack) {
cfb6eeb4 576 rep.th.seq = th->ack_seq;
1da177e4 577 } else {
cfb6eeb4
YH
578 rep.th.ack = 1;
579 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
580 skb->len - (th->doff << 2));
1da177e4
LT
581 }
582
7174259e 583 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
584 arg.iov[0].iov_base = (unsigned char *)&rep;
585 arg.iov[0].iov_len = sizeof(rep.th);
586
587#ifdef CONFIG_TCP_MD5SIG
588 key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
589 if (key) {
590 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
591 (TCPOPT_NOP << 16) |
592 (TCPOPT_MD5SIG << 8) |
593 TCPOLEN_MD5SIG);
594 /* Update length and the length the header thinks exists */
595 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
596 rep.th.doff = arg.iov[0].iov_len / 4;
597
598 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
599 key,
600 skb->nh.iph->daddr,
601 skb->nh.iph->saddr,
602 &rep.th, IPPROTO_TCP,
603 arg.iov[0].iov_len);
604 }
605#endif
1da177e4 606 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
7174259e 607 skb->nh.iph->saddr, /* XXX */
1da177e4
LT
608 sizeof(struct tcphdr), IPPROTO_TCP, 0);
609 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
610
cfb6eeb4 611 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1da177e4
LT
612
613 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
614 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
615}
616
617/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
618 outside socket context is ugly, certainly. What can I do?
619 */
620
cfb6eeb4
YH
621static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
622 struct sk_buff *skb, u32 seq, u32 ack,
1da177e4
LT
623 u32 win, u32 ts)
624{
625 struct tcphdr *th = skb->h.th;
626 struct {
627 struct tcphdr th;
714e85be 628 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 629#ifdef CONFIG_TCP_MD5SIG
714e85be 630 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
631#endif
632 ];
1da177e4
LT
633 } rep;
634 struct ip_reply_arg arg;
cfb6eeb4
YH
635#ifdef CONFIG_TCP_MD5SIG
636 struct tcp_md5sig_key *key;
637 struct tcp_md5sig_key tw_key;
638#endif
1da177e4
LT
639
640 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 641 memset(&arg, 0, sizeof(arg));
1da177e4
LT
642
643 arg.iov[0].iov_base = (unsigned char *)&rep;
644 arg.iov[0].iov_len = sizeof(rep.th);
645 if (ts) {
cfb6eeb4
YH
646 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
647 (TCPOPT_TIMESTAMP << 8) |
648 TCPOLEN_TIMESTAMP);
649 rep.opt[1] = htonl(tcp_time_stamp);
650 rep.opt[2] = htonl(ts);
651 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
652 }
653
654 /* Swap the send and the receive. */
655 rep.th.dest = th->source;
656 rep.th.source = th->dest;
657 rep.th.doff = arg.iov[0].iov_len / 4;
658 rep.th.seq = htonl(seq);
659 rep.th.ack_seq = htonl(ack);
660 rep.th.ack = 1;
661 rep.th.window = htons(win);
662
cfb6eeb4
YH
663#ifdef CONFIG_TCP_MD5SIG
664 /*
665 * The SKB holds an imcoming packet, but may not have a valid ->sk
666 * pointer. This is especially the case when we're dealing with a
667 * TIME_WAIT ack, because the sk structure is long gone, and only
668 * the tcp_timewait_sock remains. So the md5 key is stashed in that
669 * structure, and we use it in preference. I believe that (twsk ||
670 * skb->sk) holds true, but we program defensively.
671 */
672 if (!twsk && skb->sk) {
673 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
674 } else if (twsk && twsk->tw_md5_keylen) {
675 tw_key.key = twsk->tw_md5_key;
676 tw_key.keylen = twsk->tw_md5_keylen;
677 key = &tw_key;
7174259e 678 } else
cfb6eeb4 679 key = NULL;
cfb6eeb4
YH
680
681 if (key) {
682 int offset = (ts) ? 3 : 0;
683
684 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
685 (TCPOPT_NOP << 16) |
686 (TCPOPT_MD5SIG << 8) |
687 TCPOLEN_MD5SIG);
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len/4;
690
691 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
692 key,
693 skb->nh.iph->daddr,
694 skb->nh.iph->saddr,
695 &rep.th, IPPROTO_TCP,
696 arg.iov[0].iov_len);
697 }
698#endif
1da177e4 699 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
7174259e 700 skb->nh.iph->saddr, /* XXX */
1da177e4
LT
701 arg.iov[0].iov_len, IPPROTO_TCP, 0);
702 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
703
704 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
705
706 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
707}
708
709static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
710{
8feaf0c0 711 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 712 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 713
cfb6eeb4 714 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e
ACM
715 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
716 tcptw->tw_ts_recent);
1da177e4 717
8feaf0c0 718 inet_twsk_put(tw);
1da177e4
LT
719}
720
7174259e
ACM
721static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
722 struct request_sock *req)
1da177e4 723{
cfb6eeb4
YH
724 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
725 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1da177e4
LT
726 req->ts_recent);
727}
728
1da177e4
LT
729/*
730 * Send a SYN-ACK after having received an ACK.
60236fdd 731 * This still operates on a request_sock only, not on a big
1da177e4
LT
732 * socket.
733 */
60236fdd 734static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
735 struct dst_entry *dst)
736{
2e6599cb 737 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
738 int err = -1;
739 struct sk_buff * skb;
740
741 /* First, grab a route. */
463c84b9 742 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
743 goto out;
744
745 skb = tcp_make_synack(sk, dst, req);
746
747 if (skb) {
748 struct tcphdr *th = skb->h.th;
749
750 th->check = tcp_v4_check(th, skb->len,
2e6599cb
ACM
751 ireq->loc_addr,
752 ireq->rmt_addr,
1da177e4
LT
753 csum_partial((char *)th, skb->len,
754 skb->csum));
755
2e6599cb
ACM
756 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
757 ireq->rmt_addr,
758 ireq->opt);
b9df3cb8 759 err = net_xmit_eval(err);
1da177e4
LT
760 }
761
762out:
763 dst_release(dst);
764 return err;
765}
766
767/*
60236fdd 768 * IPv4 request_sock destructor.
1da177e4 769 */
60236fdd 770static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 771{
a51482bd 772 kfree(inet_rsk(req)->opt);
1da177e4
LT
773}
774
80e40daa 775#ifdef CONFIG_SYN_COOKIES
40efc6fa 776static void syn_flood_warning(struct sk_buff *skb)
1da177e4
LT
777{
778 static unsigned long warntime;
779
780 if (time_after(jiffies, (warntime + HZ * 60))) {
781 warntime = jiffies;
782 printk(KERN_INFO
783 "possible SYN flooding on port %d. Sending cookies.\n",
784 ntohs(skb->h.th->dest));
785 }
786}
80e40daa 787#endif
1da177e4
LT
788
789/*
60236fdd 790 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 791 */
40efc6fa
SH
792static struct ip_options *tcp_v4_save_options(struct sock *sk,
793 struct sk_buff *skb)
1da177e4
LT
794{
795 struct ip_options *opt = &(IPCB(skb)->opt);
796 struct ip_options *dopt = NULL;
797
798 if (opt && opt->optlen) {
799 int opt_size = optlength(opt);
800 dopt = kmalloc(opt_size, GFP_ATOMIC);
801 if (dopt) {
802 if (ip_options_echo(dopt, skb)) {
803 kfree(dopt);
804 dopt = NULL;
805 }
806 }
807 }
808 return dopt;
809}
810
cfb6eeb4
YH
811#ifdef CONFIG_TCP_MD5SIG
812/*
813 * RFC2385 MD5 checksumming requires a mapping of
814 * IP address->MD5 Key.
815 * We need to maintain these in the sk structure.
816 */
817
818/* Find the Key structure for an address. */
7174259e
ACM
819static struct tcp_md5sig_key *
820 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
821{
822 struct tcp_sock *tp = tcp_sk(sk);
823 int i;
824
825 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
826 return NULL;
827 for (i = 0; i < tp->md5sig_info->entries4; i++) {
828 if (tp->md5sig_info->keys4[i].addr == addr)
7174259e
ACM
829 return (struct tcp_md5sig_key *)
830 &tp->md5sig_info->keys4[i];
cfb6eeb4
YH
831 }
832 return NULL;
833}
834
835struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
836 struct sock *addr_sk)
837{
838 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
839}
840
841EXPORT_SYMBOL(tcp_v4_md5_lookup);
842
843struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
844 struct request_sock *req)
845{
846 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
847}
848
849/* This can be called on a newly created socket, from other files */
850int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
851 u8 *newkey, u8 newkeylen)
852{
853 /* Add Key to the list */
854 struct tcp4_md5sig_key *key;
855 struct tcp_sock *tp = tcp_sk(sk);
856 struct tcp4_md5sig_key *keys;
857
858 key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
859 if (key) {
860 /* Pre-existing entry - just update that one. */
861 kfree (key->key);
862 key->key = newkey;
863 key->keylen = newkeylen;
864 } else {
865 if (!tp->md5sig_info) {
866 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
867 if (!tp->md5sig_info) {
868 kfree(newkey);
869 return -ENOMEM;
870 }
871 }
872 if (tcp_alloc_md5sig_pool() == NULL) {
873 kfree(newkey);
874 return -ENOMEM;
875 }
876 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
877 keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
878 (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
879 if (!keys) {
880 kfree(newkey);
881 tcp_free_md5sig_pool();
882 return -ENOMEM;
883 }
884
885 if (tp->md5sig_info->entries4)
886 memcpy(keys, tp->md5sig_info->keys4,
887 (sizeof (struct tcp4_md5sig_key) *
888 tp->md5sig_info->entries4));
889
890 /* Free old key list, and reference new one */
891 if (tp->md5sig_info->keys4)
892 kfree(tp->md5sig_info->keys4);
893 tp->md5sig_info->keys4 = keys;
894 tp->md5sig_info->alloced4++;
895 }
896 tp->md5sig_info->entries4++;
897 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
898 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
899 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
900 }
901 return 0;
902}
903
904EXPORT_SYMBOL(tcp_v4_md5_do_add);
905
906static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
907 u8 *newkey, u8 newkeylen)
908{
909 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
910 newkey, newkeylen);
911}
912
913int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
914{
915 struct tcp_sock *tp = tcp_sk(sk);
916 int i;
917
918 for (i = 0; i < tp->md5sig_info->entries4; i++) {
919 if (tp->md5sig_info->keys4[i].addr == addr) {
920 /* Free the key */
921 kfree(tp->md5sig_info->keys4[i].key);
922 tp->md5sig_info->entries4--;
923
924 if (tp->md5sig_info->entries4 == 0) {
925 kfree(tp->md5sig_info->keys4);
926 tp->md5sig_info->keys4 = NULL;
7174259e 927 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 928 /* Need to do some manipulation */
7174259e
ACM
929 memcpy(&tp->md5sig_info->keys4[i],
930 &tp->md5sig_info->keys4[i+1],
931 (tp->md5sig_info->entries4 - i) *
932 sizeof(struct tcp4_md5sig_key));
cfb6eeb4
YH
933 }
934 tcp_free_md5sig_pool();
935 return 0;
936 }
937 }
938 return -ENOENT;
939}
940
941EXPORT_SYMBOL(tcp_v4_md5_do_del);
942
7174259e 943static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
944{
945 struct tcp_sock *tp = tcp_sk(sk);
946
947 /* Free each key, then the set of key keys,
948 * the crypto element, and then decrement our
949 * hold on the last resort crypto.
950 */
951 if (tp->md5sig_info->entries4) {
952 int i;
953 for (i = 0; i < tp->md5sig_info->entries4; i++)
954 kfree(tp->md5sig_info->keys4[i].key);
955 tp->md5sig_info->entries4 = 0;
956 tcp_free_md5sig_pool();
957 }
958 if (tp->md5sig_info->keys4) {
959 kfree(tp->md5sig_info->keys4);
960 tp->md5sig_info->keys4 = NULL;
961 tp->md5sig_info->alloced4 = 0;
962 }
963}
964
7174259e
ACM
965static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
966 int optlen)
cfb6eeb4
YH
967{
968 struct tcp_md5sig cmd;
969 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
970 u8 *newkey;
971
972 if (optlen < sizeof(cmd))
973 return -EINVAL;
974
7174259e 975 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
976 return -EFAULT;
977
978 if (sin->sin_family != AF_INET)
979 return -EINVAL;
980
981 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
982 if (!tcp_sk(sk)->md5sig_info)
983 return -ENOENT;
984 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
985 }
986
987 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
988 return -EINVAL;
989
990 if (!tcp_sk(sk)->md5sig_info) {
991 struct tcp_sock *tp = tcp_sk(sk);
7174259e 992 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
cfb6eeb4 993
cfb6eeb4
YH
994 if (!p)
995 return -EINVAL;
996
997 tp->md5sig_info = p;
998
999 }
1000
1001 newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
1002 if (!newkey)
1003 return -ENOMEM;
1004 memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
1005 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1006 newkey, cmd.tcpm_keylen);
1007}
1008
1009static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1010 __be32 saddr, __be32 daddr,
1011 struct tcphdr *th, int protocol,
1012 int tcplen)
1013{
1014 struct scatterlist sg[4];
1015 __u16 data_len;
1016 int block = 0;
1017#ifdef CONFIG_TCP_MD5SIG_DEBUG
1018 int i;
1019#endif
1020 __u16 old_checksum;
1021 struct tcp_md5sig_pool *hp;
1022 struct tcp4_pseudohdr *bp;
1023 struct hash_desc *desc;
1024 int err;
1025 unsigned int nbytes = 0;
1026
1027 /*
1028 * Okay, so RFC2385 is turned on for this connection,
1029 * so we need to generate the MD5 hash for the packet now.
1030 */
1031
1032 hp = tcp_get_md5sig_pool();
1033 if (!hp)
1034 goto clear_hash_noput;
1035
1036 bp = &hp->md5_blk.ip4;
1037 desc = &hp->md5_desc;
1038
1039 /*
1040 * 1. the TCP pseudo-header (in the order: source IP address,
1041 * destination IP address, zero-padded protocol number, and
1042 * segment length)
1043 */
1044 bp->saddr = saddr;
1045 bp->daddr = daddr;
1046 bp->pad = 0;
1047 bp->protocol = protocol;
1048 bp->len = htons(tcplen);
1049 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1050 nbytes += sizeof(*bp);
1051
1052#ifdef CONFIG_TCP_MD5SIG_DEBUG
1053 printk("Calcuating hash for: ");
7174259e
ACM
1054 for (i = 0; i < sizeof(*bp); i++)
1055 printk("%02x ", (unsigned int)((unsigned char *)bp)[i]);
cfb6eeb4
YH
1056 printk(" ");
1057#endif
1058
1059 /* 2. the TCP header, excluding options, and assuming a
1060 * checksum of zero/
1061 */
1062 old_checksum = th->check;
1063 th->check = 0;
1064 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1065 nbytes += sizeof(struct tcphdr);
1066#ifdef CONFIG_TCP_MD5SIG_DEBUG
7174259e
ACM
1067 for (i = 0; i < sizeof(struct tcphdr); i++)
1068 printk(" %02x", (unsigned int)((unsigned char *)th)[i]);
cfb6eeb4
YH
1069#endif
1070 /* 3. the TCP segment data (if any) */
1071 data_len = tcplen - (th->doff << 2);
1072 if (data_len > 0) {
1073 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1074 sg_set_buf(&sg[block++], data, data_len);
1075 nbytes += data_len;
1076 }
1077
1078 /* 4. an independently-specified key or password, known to both
1079 * TCPs and presumably connection-specific
1080 */
1081 sg_set_buf(&sg[block++], key->key, key->keylen);
1082 nbytes += key->keylen;
1083
1084#ifdef CONFIG_TCP_MD5SIG_DEBUG
7174259e 1085 printk(" and password: ");
cfb6eeb4 1086 for (i = 0; i < key->keylen; i++)
7174259e 1087 printk("%02x ", (unsigned int)key->key[i]);
cfb6eeb4
YH
1088#endif
1089
1090 /* Now store the Hash into the packet */
1091 err = crypto_hash_init(desc);
1092 if (err)
1093 goto clear_hash;
1094 err = crypto_hash_update(desc, sg, nbytes);
1095 if (err)
1096 goto clear_hash;
1097 err = crypto_hash_final(desc, md5_hash);
1098 if (err)
1099 goto clear_hash;
1100
1101 /* Reset header, and free up the crypto */
1102 tcp_put_md5sig_pool();
1103 th->check = old_checksum;
1104
1105out:
1106#ifdef CONFIG_TCP_MD5SIG_DEBUG
1107 printk(" result:");
1108 for (i = 0; i < 16; i++)
7174259e 1109 printk(" %02x", (unsigned int)(((u8*)md5_hash)[i]));
cfb6eeb4
YH
1110 printk("\n");
1111#endif
1112 return 0;
1113clear_hash:
1114 tcp_put_md5sig_pool();
1115clear_hash_noput:
1116 memset(md5_hash, 0, 16);
1117 goto out;
1118}
1119
1120int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1121 struct sock *sk,
1122 struct dst_entry *dst,
1123 struct request_sock *req,
1124 struct tcphdr *th, int protocol,
1125 int tcplen)
1126{
1127 __be32 saddr, daddr;
1128
1129 if (sk) {
1130 saddr = inet_sk(sk)->saddr;
1131 daddr = inet_sk(sk)->daddr;
1132 } else {
1133 struct rtable *rt = (struct rtable *)dst;
1134 BUG_ON(!rt);
1135 saddr = rt->rt_src;
1136 daddr = rt->rt_dst;
1137 }
1138 return tcp_v4_do_calc_md5_hash(md5_hash, key,
1139 saddr, daddr,
1140 th, protocol, tcplen);
1141}
1142
1143EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1144
7174259e 1145static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
cfb6eeb4
YH
1146{
1147 /*
1148 * This gets called for each TCP segment that arrives
1149 * so we want to be efficient.
1150 * We have 3 drop cases:
1151 * o No MD5 hash and one expected.
1152 * o MD5 hash and we're not expecting one.
1153 * o MD5 hash and its wrong.
1154 */
1155 __u8 *hash_location = NULL;
1156 struct tcp_md5sig_key *hash_expected;
1157 struct iphdr *iph = skb->nh.iph;
1158 struct tcphdr *th = skb->h.th;
7174259e 1159 int length = (th->doff << 2) - sizeof(struct tcphdr);
cfb6eeb4
YH
1160 int genhash;
1161 unsigned char *ptr;
1162 unsigned char newhash[16];
1163
1164 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1165
1166 /*
1167 * If the TCP option length is less than the TCP_MD5SIG
1168 * option length, then we can shortcut
1169 */
1170 if (length < TCPOLEN_MD5SIG) {
1171 if (hash_expected)
1172 return 1;
1173 else
1174 return 0;
1175 }
1176
1177 /* Okay, we can't shortcut - we have to grub through the options */
1178 ptr = (unsigned char *)(th + 1);
1179 while (length > 0) {
1180 int opcode = *ptr++;
1181 int opsize;
1182
1183 switch (opcode) {
1184 case TCPOPT_EOL:
1185 goto done_opts;
1186 case TCPOPT_NOP:
1187 length--;
1188 continue;
1189 default:
1190 opsize = *ptr++;
1191 if (opsize < 2)
1192 goto done_opts;
1193 if (opsize > length)
1194 goto done_opts;
1195
1196 if (opcode == TCPOPT_MD5SIG) {
1197 hash_location = ptr;
1198 goto done_opts;
1199 }
1200 }
1201 ptr += opsize-2;
1202 length -= opsize;
1203 }
1204done_opts:
1205 /* We've parsed the options - do we have a hash? */
1206 if (!hash_expected && !hash_location)
1207 return 0;
1208
1209 if (hash_expected && !hash_location) {
7174259e 1210 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
cfb6eeb4 1211 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
7174259e
ACM
1212 NIPQUAD(iph->saddr), ntohs(th->source),
1213 NIPQUAD(iph->daddr), ntohs(th->dest));
cfb6eeb4
YH
1214 return 1;
1215 }
1216
1217 if (!hash_expected && hash_location) {
7174259e 1218 LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
cfb6eeb4 1219 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
7174259e
ACM
1220 NIPQUAD(iph->saddr), ntohs(th->source),
1221 NIPQUAD(iph->daddr), ntohs(th->dest));
cfb6eeb4
YH
1222 return 1;
1223 }
1224
1225 /* Okay, so this is hash_expected and hash_location -
1226 * so we need to calculate the checksum.
1227 */
1228 genhash = tcp_v4_do_calc_md5_hash(newhash,
1229 hash_expected,
1230 iph->saddr, iph->daddr,
1231 th, sk->sk_protocol,
1232 skb->len);
1233
1234 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1235 if (net_ratelimit()) {
1236 printk(KERN_INFO "MD5 Hash failed for "
1237 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
7174259e
ACM
1238 NIPQUAD(iph->saddr), ntohs(th->source),
1239 NIPQUAD(iph->daddr), ntohs(th->dest),
cfb6eeb4
YH
1240 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1241#ifdef CONFIG_TCP_MD5SIG_DEBUG
1242 do {
1243 int i;
1244 printk("Received: ");
1245 for (i = 0; i < 16; i++)
7174259e
ACM
1246 printk("%02x ",
1247 0xff & (int)hash_location[i]);
cfb6eeb4
YH
1248 printk("\n");
1249 printk("Calculated: ");
1250 for (i = 0; i < 16; i++)
1251 printk("%02x ", 0xff & (int)newhash[i]);
1252 printk("\n");
1253 } while(0);
1254#endif
1255 }
1256 return 1;
1257 }
1258 return 0;
1259}
1260
1261#endif
1262
72a3effa 1263struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1264 .family = PF_INET,
2e6599cb 1265 .obj_size = sizeof(struct tcp_request_sock),
1da177e4 1266 .rtx_syn_ack = tcp_v4_send_synack,
60236fdd
ACM
1267 .send_ack = tcp_v4_reqsk_send_ack,
1268 .destructor = tcp_v4_reqsk_destructor,
1da177e4
LT
1269 .send_reset = tcp_v4_send_reset,
1270};
1271
cfb6eeb4
YH
1272struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1273#ifdef CONFIG_TCP_MD5SIG
1274 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1275#endif
1276};
1277
6d6ee43e
ACM
1278static struct timewait_sock_ops tcp_timewait_sock_ops = {
1279 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1280 .twsk_unique = tcp_twsk_unique,
cfb6eeb4 1281 .twsk_destructor= tcp_twsk_destructor,
6d6ee43e
ACM
1282};
1283
1da177e4
LT
1284int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285{
2e6599cb 1286 struct inet_request_sock *ireq;
1da177e4 1287 struct tcp_options_received tmp_opt;
60236fdd 1288 struct request_sock *req;
adaf345b
AV
1289 __be32 saddr = skb->nh.iph->saddr;
1290 __be32 daddr = skb->nh.iph->daddr;
1da177e4
LT
1291 __u32 isn = TCP_SKB_CB(skb)->when;
1292 struct dst_entry *dst = NULL;
1293#ifdef CONFIG_SYN_COOKIES
1294 int want_cookie = 0;
1295#else
1296#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1297#endif
1298
1299 /* Never answer to SYNs send to broadcast or multicast */
1300 if (((struct rtable *)skb->dst)->rt_flags &
1301 (RTCF_BROADCAST | RTCF_MULTICAST))
1302 goto drop;
1303
1304 /* TW buckets are converted to open requests without
1305 * limitations, they conserve resources and peer is
1306 * evidently real one.
1307 */
463c84b9 1308 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1309#ifdef CONFIG_SYN_COOKIES
1310 if (sysctl_tcp_syncookies) {
1311 want_cookie = 1;
1312 } else
1313#endif
1314 goto drop;
1315 }
1316
1317 /* Accept backlog is full. If we have already queued enough
1318 * of warm entries in syn queue, drop request. It is better than
1319 * clogging syn queue with openreqs with exponentially increasing
1320 * timeout.
1321 */
463c84b9 1322 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1323 goto drop;
1324
60236fdd 1325 req = reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1326 if (!req)
1327 goto drop;
1328
cfb6eeb4
YH
1329#ifdef CONFIG_TCP_MD5SIG
1330 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1331#endif
1332
1da177e4
LT
1333 tcp_clear_options(&tmp_opt);
1334 tmp_opt.mss_clamp = 536;
1335 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1336
1337 tcp_parse_options(skb, &tmp_opt, 0);
1338
1339 if (want_cookie) {
1340 tcp_clear_options(&tmp_opt);
1341 tmp_opt.saw_tstamp = 0;
1342 }
1343
1344 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1345 /* Some OSes (unknown ones, but I see them on web server, which
1346 * contains information interesting only for windows'
1347 * users) do not send their stamp in SYN. It is easy case.
1348 * We simply do not advertise TS support.
1349 */
1350 tmp_opt.saw_tstamp = 0;
1351 tmp_opt.tstamp_ok = 0;
1352 }
1353 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1354
1355 tcp_openreq_init(req, &tmp_opt, skb);
1356
4237c75c
VY
1357 if (security_inet_conn_request(sk, skb, req))
1358 goto drop_and_free;
1359
2e6599cb
ACM
1360 ireq = inet_rsk(req);
1361 ireq->loc_addr = daddr;
1362 ireq->rmt_addr = saddr;
1363 ireq->opt = tcp_v4_save_options(sk, skb);
1da177e4
LT
1364 if (!want_cookie)
1365 TCP_ECN_create_request(req, skb->h.th);
1366
1367 if (want_cookie) {
1368#ifdef CONFIG_SYN_COOKIES
1369 syn_flood_warning(skb);
1370#endif
1371 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1372 } else if (!isn) {
1373 struct inet_peer *peer = NULL;
1374
1375 /* VJ's idea. We save last timestamp seen
1376 * from the destination in peer table, when entering
1377 * state TIME-WAIT, and check against it before
1378 * accepting new connection request.
1379 *
1380 * If "isn" is not zero, this request hit alive
1381 * timewait bucket, so that all the necessary checks
1382 * are made in the function processing timewait state.
1383 */
1384 if (tmp_opt.saw_tstamp &&
295ff7ed 1385 tcp_death_row.sysctl_tw_recycle &&
463c84b9 1386 (dst = inet_csk_route_req(sk, req)) != NULL &&
1da177e4
LT
1387 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1388 peer->v4daddr == saddr) {
1389 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1390 (s32)(peer->tcp_ts - req->ts_recent) >
1391 TCP_PAWS_WINDOW) {
1392 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1393 dst_release(dst);
1394 goto drop_and_free;
1395 }
1396 }
1397 /* Kill the following clause, if you dislike this way. */
1398 else if (!sysctl_tcp_syncookies &&
463c84b9 1399 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1400 (sysctl_max_syn_backlog >> 2)) &&
1401 (!peer || !peer->tcp_ts_stamp) &&
1402 (!dst || !dst_metric(dst, RTAX_RTT))) {
1403 /* Without syncookies last quarter of
1404 * backlog is filled with destinations,
1405 * proven to be alive.
1406 * It means that we continue to communicate
1407 * to destinations, already remembered
1408 * to the moment of synflood.
1409 */
64ce2073
PM
1410 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1411 "request from %u.%u.%u.%u/%u\n",
1412 NIPQUAD(saddr),
1413 ntohs(skb->h.th->source));
1da177e4
LT
1414 dst_release(dst);
1415 goto drop_and_free;
1416 }
1417
a94f723d 1418 isn = tcp_v4_init_sequence(skb);
1da177e4 1419 }
2e6599cb 1420 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1421
1422 if (tcp_v4_send_synack(sk, req, dst))
1423 goto drop_and_free;
1424
1425 if (want_cookie) {
60236fdd 1426 reqsk_free(req);
1da177e4 1427 } else {
3f421baa 1428 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1429 }
1430 return 0;
1431
1432drop_and_free:
60236fdd 1433 reqsk_free(req);
1da177e4 1434drop:
1da177e4
LT
1435 return 0;
1436}
1437
1438
1439/*
1440 * The three way handshake has completed - we got a valid synack -
1441 * now create the new socket.
1442 */
1443struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1444 struct request_sock *req,
1da177e4
LT
1445 struct dst_entry *dst)
1446{
2e6599cb 1447 struct inet_request_sock *ireq;
1da177e4
LT
1448 struct inet_sock *newinet;
1449 struct tcp_sock *newtp;
1450 struct sock *newsk;
cfb6eeb4
YH
1451#ifdef CONFIG_TCP_MD5SIG
1452 struct tcp_md5sig_key *key;
1453#endif
1da177e4
LT
1454
1455 if (sk_acceptq_is_full(sk))
1456 goto exit_overflow;
1457
463c84b9 1458 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
1459 goto exit;
1460
1461 newsk = tcp_create_openreq_child(sk, req, skb);
1462 if (!newsk)
1463 goto exit;
1464
bcd76111 1465 newsk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 1466 sk_setup_caps(newsk, dst);
1da177e4
LT
1467
1468 newtp = tcp_sk(newsk);
1469 newinet = inet_sk(newsk);
2e6599cb
ACM
1470 ireq = inet_rsk(req);
1471 newinet->daddr = ireq->rmt_addr;
1472 newinet->rcv_saddr = ireq->loc_addr;
1473 newinet->saddr = ireq->loc_addr;
1474 newinet->opt = ireq->opt;
1475 ireq->opt = NULL;
463c84b9 1476 newinet->mc_index = inet_iif(skb);
1da177e4 1477 newinet->mc_ttl = skb->nh.iph->ttl;
d83d8461 1478 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1479 if (newinet->opt)
d83d8461 1480 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1da177e4
LT
1481 newinet->id = newtp->write_seq ^ jiffies;
1482
5d424d5a 1483 tcp_mtup_init(newsk);
1da177e4
LT
1484 tcp_sync_mss(newsk, dst_mtu(dst));
1485 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1486 tcp_initialize_rcv_mss(newsk);
1487
cfb6eeb4
YH
1488#ifdef CONFIG_TCP_MD5SIG
1489 /* Copy over the MD5 key from the original socket */
1490 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1491 /*
1492 * We're using one, so create a matching key
1493 * on the newsk structure. If we fail to get
1494 * memory, then we end up not copying the key
1495 * across. Shucks.
1496 */
1497 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1498 if (newkey) {
1499 memcpy(newkey, key->key, key->keylen);
1500 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1501 newkey, key->keylen);
1502 }
1503 }
1504#endif
1505
f3f05f70 1506 __inet_hash(&tcp_hashinfo, newsk, 0);
2d8c4ce5 1507 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1508
1509 return newsk;
1510
1511exit_overflow:
1512 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1513exit:
1514 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1515 dst_release(dst);
1516 return NULL;
1517}
1518
1519static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1520{
1521 struct tcphdr *th = skb->h.th;
1522 struct iphdr *iph = skb->nh.iph;
1da177e4 1523 struct sock *nsk;
60236fdd 1524 struct request_sock **prev;
1da177e4 1525 /* Find possible connection requests. */
463c84b9
ACM
1526 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1527 iph->saddr, iph->daddr);
1da177e4
LT
1528 if (req)
1529 return tcp_check_req(sk, skb, req, prev);
1530
8f491069
HX
1531 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1532 th->source, skb->nh.iph->daddr,
1533 th->dest, inet_iif(skb));
1da177e4
LT
1534
1535 if (nsk) {
1536 if (nsk->sk_state != TCP_TIME_WAIT) {
1537 bh_lock_sock(nsk);
1538 return nsk;
1539 }
9469c7b4 1540 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1541 return NULL;
1542 }
1543
1544#ifdef CONFIG_SYN_COOKIES
1545 if (!th->rst && !th->syn && th->ack)
1546 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1547#endif
1548 return sk;
1549}
1550
b51655b9 1551static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1552{
84fa7933 1553 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1da177e4 1554 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
fb286bb2
HX
1555 skb->nh.iph->daddr, skb->csum)) {
1556 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1557 return 0;
fb286bb2 1558 }
1da177e4 1559 }
fb286bb2
HX
1560
1561 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1562 skb->len, IPPROTO_TCP, 0);
1563
1da177e4 1564 if (skb->len <= 76) {
fb286bb2 1565 return __skb_checksum_complete(skb);
1da177e4
LT
1566 }
1567 return 0;
1568}
1569
1570
1571/* The socket must have it's spinlock held when we get
1572 * here.
1573 *
1574 * We have a potential double-lock case here, so even when
1575 * doing backlog processing we use the BH locking scheme.
1576 * This is because we cannot sleep with the original spinlock
1577 * held.
1578 */
1579int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1580{
cfb6eeb4
YH
1581 struct sock *rsk;
1582#ifdef CONFIG_TCP_MD5SIG
1583 /*
1584 * We really want to reject the packet as early as possible
1585 * if:
1586 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1587 * o There is an MD5 option and we're not expecting one
1588 */
7174259e 1589 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1590 goto discard;
1591#endif
1592
1da177e4
LT
1593 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1594 TCP_CHECK_TIMER(sk);
cfb6eeb4
YH
1595 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1596 rsk = sk;
1da177e4 1597 goto reset;
cfb6eeb4 1598 }
1da177e4
LT
1599 TCP_CHECK_TIMER(sk);
1600 return 0;
1601 }
1602
1603 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1604 goto csum_err;
1605
1606 if (sk->sk_state == TCP_LISTEN) {
1607 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1608 if (!nsk)
1609 goto discard;
1610
1611 if (nsk != sk) {
cfb6eeb4
YH
1612 if (tcp_child_process(sk, nsk, skb)) {
1613 rsk = nsk;
1da177e4 1614 goto reset;
cfb6eeb4 1615 }
1da177e4
LT
1616 return 0;
1617 }
1618 }
1619
1620 TCP_CHECK_TIMER(sk);
cfb6eeb4
YH
1621 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1622 rsk = sk;
1da177e4 1623 goto reset;
cfb6eeb4 1624 }
1da177e4
LT
1625 TCP_CHECK_TIMER(sk);
1626 return 0;
1627
1628reset:
cfb6eeb4 1629 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1630discard:
1631 kfree_skb(skb);
1632 /* Be careful here. If this function gets more complicated and
1633 * gcc suffers from register pressure on the x86, sk (in %ebx)
1634 * might be destroyed here. This current version compiles correctly,
1635 * but you have been warned.
1636 */
1637 return 0;
1638
1639csum_err:
1640 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1641 goto discard;
1642}
1643
1644/*
1645 * From tcp_input.c
1646 */
1647
1648int tcp_v4_rcv(struct sk_buff *skb)
1649{
1650 struct tcphdr *th;
1651 struct sock *sk;
1652 int ret;
1653
1654 if (skb->pkt_type != PACKET_HOST)
1655 goto discard_it;
1656
1657 /* Count it even if it's bad */
1658 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1659
1660 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1661 goto discard_it;
1662
1663 th = skb->h.th;
1664
1665 if (th->doff < sizeof(struct tcphdr) / 4)
1666 goto bad_packet;
1667 if (!pskb_may_pull(skb, th->doff * 4))
1668 goto discard_it;
1669
1670 /* An explanation is required here, I think.
1671 * Packet length and doff are validated by header prediction,
caa20d9a 1672 * provided case of th->doff==0 is eliminated.
1da177e4
LT
1673 * So, we defer the checks. */
1674 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
fb286bb2 1675 tcp_v4_checksum_init(skb)))
1da177e4
LT
1676 goto bad_packet;
1677
1678 th = skb->h.th;
1679 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1680 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1681 skb->len - th->doff * 4);
1682 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1683 TCP_SKB_CB(skb)->when = 0;
1684 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1685 TCP_SKB_CB(skb)->sacked = 0;
1686
e48c414e 1687 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
8f491069 1688 skb->nh.iph->daddr, th->dest,
463c84b9 1689 inet_iif(skb));
1da177e4
LT
1690
1691 if (!sk)
1692 goto no_tcp_socket;
1693
1694process:
1695 if (sk->sk_state == TCP_TIME_WAIT)
1696 goto do_time_wait;
1697
1698 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1699 goto discard_and_relse;
b59c2701 1700 nf_reset(skb);
1da177e4 1701
fda9ef5d 1702 if (sk_filter(sk, skb))
1da177e4
LT
1703 goto discard_and_relse;
1704
1705 skb->dev = NULL;
1706
c6366184 1707 bh_lock_sock_nested(sk);
1da177e4
LT
1708 ret = 0;
1709 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1710#ifdef CONFIG_NET_DMA
1711 struct tcp_sock *tp = tcp_sk(sk);
1712 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1713 tp->ucopy.dma_chan = get_softnet_dma();
1714 if (tp->ucopy.dma_chan)
1da177e4 1715 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1716 else
1717#endif
1718 {
1719 if (!tcp_prequeue(sk, skb))
1720 ret = tcp_v4_do_rcv(sk, skb);
1721 }
1da177e4
LT
1722 } else
1723 sk_add_backlog(sk, skb);
1724 bh_unlock_sock(sk);
1725
1726 sock_put(sk);
1727
1728 return ret;
1729
1730no_tcp_socket:
1731 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1732 goto discard_it;
1733
1734 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1735bad_packet:
1736 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1737 } else {
cfb6eeb4 1738 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1739 }
1740
1741discard_it:
1742 /* Discard frame. */
1743 kfree_skb(skb);
1744 return 0;
1745
1746discard_and_relse:
1747 sock_put(sk);
1748 goto discard_it;
1749
1750do_time_wait:
1751 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1752 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1753 goto discard_it;
1754 }
1755
1756 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1757 TCP_INC_STATS_BH(TCP_MIB_INERRS);
9469c7b4 1758 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1759 goto discard_it;
1760 }
9469c7b4 1761 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1762 case TCP_TW_SYN: {
33b62231
ACM
1763 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1764 skb->nh.iph->daddr,
8f491069 1765 th->dest,
463c84b9 1766 inet_iif(skb));
1da177e4 1767 if (sk2) {
9469c7b4
YH
1768 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1769 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1770 sk = sk2;
1771 goto process;
1772 }
1773 /* Fall through to ACK */
1774 }
1775 case TCP_TW_ACK:
1776 tcp_v4_timewait_ack(sk, skb);
1777 break;
1778 case TCP_TW_RST:
1779 goto no_tcp_socket;
1780 case TCP_TW_SUCCESS:;
1781 }
1782 goto discard_it;
1783}
1784
1da177e4
LT
1785/* VJ's idea. Save last timestamp seen from this destination
1786 * and hold it at least for normal timewait interval to use for duplicate
1787 * segment detection in subsequent connections, before they enter synchronized
1788 * state.
1789 */
1790
1791int tcp_v4_remember_stamp(struct sock *sk)
1792{
1793 struct inet_sock *inet = inet_sk(sk);
1794 struct tcp_sock *tp = tcp_sk(sk);
1795 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1796 struct inet_peer *peer = NULL;
1797 int release_it = 0;
1798
1799 if (!rt || rt->rt_dst != inet->daddr) {
1800 peer = inet_getpeer(inet->daddr, 1);
1801 release_it = 1;
1802 } else {
1803 if (!rt->peer)
1804 rt_bind_peer(rt, 1);
1805 peer = rt->peer;
1806 }
1807
1808 if (peer) {
1809 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1810 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1811 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1812 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1813 peer->tcp_ts = tp->rx_opt.ts_recent;
1814 }
1815 if (release_it)
1816 inet_putpeer(peer);
1817 return 1;
1818 }
1819
1820 return 0;
1821}
1822
8feaf0c0 1823int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1da177e4 1824{
8feaf0c0 1825 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1da177e4
LT
1826
1827 if (peer) {
8feaf0c0
ACM
1828 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1829
1830 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1da177e4 1831 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
8feaf0c0
ACM
1832 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1833 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1834 peer->tcp_ts = tcptw->tw_ts_recent;
1da177e4
LT
1835 }
1836 inet_putpeer(peer);
1837 return 1;
1838 }
1839
1840 return 0;
1841}
1842
8292a17a 1843struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1844 .queue_xmit = ip_queue_xmit,
1845 .send_check = tcp_v4_send_check,
1846 .rebuild_header = inet_sk_rebuild_header,
1847 .conn_request = tcp_v4_conn_request,
1848 .syn_recv_sock = tcp_v4_syn_recv_sock,
1849 .remember_stamp = tcp_v4_remember_stamp,
1850 .net_header_len = sizeof(struct iphdr),
1851 .setsockopt = ip_setsockopt,
1852 .getsockopt = ip_getsockopt,
1853 .addr2sockaddr = inet_csk_addr2sockaddr,
1854 .sockaddr_len = sizeof(struct sockaddr_in),
3fdadf7d 1855#ifdef CONFIG_COMPAT
543d9cfe
ACM
1856 .compat_setsockopt = compat_ip_setsockopt,
1857 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1858#endif
1da177e4
LT
1859};
1860
cfb6eeb4
YH
1861struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1862#ifdef CONFIG_TCP_MD5SIG
1863 .md5_lookup = tcp_v4_md5_lookup,
1864 .calc_md5_hash = tcp_v4_calc_md5_hash,
1865 .md5_add = tcp_v4_md5_add_func,
1866 .md5_parse = tcp_v4_parse_md5_keys,
1867#endif
1868};
1869
1da177e4
LT
1870/* NOTE: A lot of things set to zero explicitly by call to
1871 * sk_alloc() so need not be done here.
1872 */
1873static int tcp_v4_init_sock(struct sock *sk)
1874{
6687e988 1875 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1876 struct tcp_sock *tp = tcp_sk(sk);
1877
1878 skb_queue_head_init(&tp->out_of_order_queue);
1879 tcp_init_xmit_timers(sk);
1880 tcp_prequeue_init(tp);
1881
6687e988 1882 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1883 tp->mdev = TCP_TIMEOUT_INIT;
1884
1885 /* So many TCP implementations out there (incorrectly) count the
1886 * initial SYN frame in their delayed-ACK and congestion control
1887 * algorithms that we must have the following bandaid to talk
1888 * efficiently to them. -DaveM
1889 */
1890 tp->snd_cwnd = 2;
1891
1892 /* See draft-stevens-tcpca-spec-01 for discussion of the
1893 * initialization of these values.
1894 */
1895 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1896 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1897 tp->mss_cache = 536;
1da177e4
LT
1898
1899 tp->reordering = sysctl_tcp_reordering;
6687e988 1900 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1901
1902 sk->sk_state = TCP_CLOSE;
1903
1904 sk->sk_write_space = sk_stream_write_space;
1905 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1906
8292a17a 1907 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1908 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1909#ifdef CONFIG_TCP_MD5SIG
1910 tp->af_specific = &tcp_sock_ipv4_specific;
1911#endif
1da177e4
LT
1912
1913 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1914 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1915
1916 atomic_inc(&tcp_sockets_allocated);
1917
1918 return 0;
1919}
1920
1921int tcp_v4_destroy_sock(struct sock *sk)
1922{
1923 struct tcp_sock *tp = tcp_sk(sk);
1924
1925 tcp_clear_xmit_timers(sk);
1926
6687e988 1927 tcp_cleanup_congestion_control(sk);
317a76f9 1928
1da177e4
LT
1929 /* Cleanup up the write buffer. */
1930 sk_stream_writequeue_purge(sk);
1931
1932 /* Cleans up our, hopefully empty, out_of_order_queue. */
1933 __skb_queue_purge(&tp->out_of_order_queue);
1934
cfb6eeb4
YH
1935#ifdef CONFIG_TCP_MD5SIG
1936 /* Clean up the MD5 key list, if any */
1937 if (tp->md5sig_info) {
1938 tcp_v4_clear_md5_list(sk);
1939 kfree(tp->md5sig_info);
1940 tp->md5sig_info = NULL;
1941 }
1942#endif
1943
1a2449a8
CL
1944#ifdef CONFIG_NET_DMA
1945 /* Cleans up our sk_async_wait_queue */
1946 __skb_queue_purge(&sk->sk_async_wait_queue);
1947#endif
1948
1da177e4
LT
1949 /* Clean prequeue, it must be empty really */
1950 __skb_queue_purge(&tp->ucopy.prequeue);
1951
1952 /* Clean up a referenced TCP bind bucket. */
463c84b9 1953 if (inet_csk(sk)->icsk_bind_hash)
2d8c4ce5 1954 inet_put_port(&tcp_hashinfo, sk);
1da177e4
LT
1955
1956 /*
1957 * If sendmsg cached page exists, toss it.
1958 */
1959 if (sk->sk_sndmsg_page) {
1960 __free_page(sk->sk_sndmsg_page);
1961 sk->sk_sndmsg_page = NULL;
1962 }
1963
1964 atomic_dec(&tcp_sockets_allocated);
1965
1966 return 0;
1967}
1968
1969EXPORT_SYMBOL(tcp_v4_destroy_sock);
1970
1971#ifdef CONFIG_PROC_FS
1972/* Proc filesystem TCP sock list dumping. */
1973
8feaf0c0 1974static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1da177e4
LT
1975{
1976 return hlist_empty(head) ? NULL :
8feaf0c0 1977 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1978}
1979
8feaf0c0 1980static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4
LT
1981{
1982 return tw->tw_node.next ?
1983 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1984}
1985
1986static void *listening_get_next(struct seq_file *seq, void *cur)
1987{
463c84b9 1988 struct inet_connection_sock *icsk;
1da177e4
LT
1989 struct hlist_node *node;
1990 struct sock *sk = cur;
1991 struct tcp_iter_state* st = seq->private;
1992
1993 if (!sk) {
1994 st->bucket = 0;
6e04e021 1995 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1da177e4
LT
1996 goto get_sk;
1997 }
1998
1999 ++st->num;
2000
2001 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2002 struct request_sock *req = cur;
1da177e4 2003
72a3effa 2004 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2005 req = req->dl_next;
2006 while (1) {
2007 while (req) {
60236fdd 2008 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2009 cur = req;
2010 goto out;
2011 }
2012 req = req->dl_next;
2013 }
72a3effa 2014 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2015 break;
2016get_req:
463c84b9 2017 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
2018 }
2019 sk = sk_next(st->syn_wait_sk);
2020 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2021 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2022 } else {
463c84b9
ACM
2023 icsk = inet_csk(sk);
2024 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2026 goto start_req;
463c84b9 2027 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2028 sk = sk_next(sk);
2029 }
2030get_sk:
2031 sk_for_each_from(sk, node) {
2032 if (sk->sk_family == st->family) {
2033 cur = sk;
2034 goto out;
2035 }
463c84b9
ACM
2036 icsk = inet_csk(sk);
2037 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2038 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2039start_req:
2040 st->uid = sock_i_uid(sk);
2041 st->syn_wait_sk = sk;
2042 st->state = TCP_SEQ_STATE_OPENREQ;
2043 st->sbucket = 0;
2044 goto get_req;
2045 }
463c84b9 2046 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2047 }
0f7ff927 2048 if (++st->bucket < INET_LHTABLE_SIZE) {
6e04e021 2049 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1da177e4
LT
2050 goto get_sk;
2051 }
2052 cur = NULL;
2053out:
2054 return cur;
2055}
2056
2057static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2058{
2059 void *rc = listening_get_next(seq, NULL);
2060
2061 while (rc && *pos) {
2062 rc = listening_get_next(seq, rc);
2063 --*pos;
2064 }
2065 return rc;
2066}
2067
2068static void *established_get_first(struct seq_file *seq)
2069{
2070 struct tcp_iter_state* st = seq->private;
2071 void *rc = NULL;
2072
6e04e021 2073 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1da177e4
LT
2074 struct sock *sk;
2075 struct hlist_node *node;
8feaf0c0 2076 struct inet_timewait_sock *tw;
1da177e4
LT
2077
2078 /* We can reschedule _before_ having picked the target: */
2079 cond_resched_softirq();
2080
6e04e021
ACM
2081 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2082 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1da177e4
LT
2083 if (sk->sk_family != st->family) {
2084 continue;
2085 }
2086 rc = sk;
2087 goto out;
2088 }
2089 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0
ACM
2090 inet_twsk_for_each(tw, node,
2091 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
1da177e4
LT
2092 if (tw->tw_family != st->family) {
2093 continue;
2094 }
2095 rc = tw;
2096 goto out;
2097 }
6e04e021 2098 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2099 st->state = TCP_SEQ_STATE_ESTABLISHED;
2100 }
2101out:
2102 return rc;
2103}
2104
2105static void *established_get_next(struct seq_file *seq, void *cur)
2106{
2107 struct sock *sk = cur;
8feaf0c0 2108 struct inet_timewait_sock *tw;
1da177e4
LT
2109 struct hlist_node *node;
2110 struct tcp_iter_state* st = seq->private;
2111
2112 ++st->num;
2113
2114 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2115 tw = cur;
2116 tw = tw_next(tw);
2117get_tw:
2118 while (tw && tw->tw_family != st->family) {
2119 tw = tw_next(tw);
2120 }
2121 if (tw) {
2122 cur = tw;
2123 goto out;
2124 }
6e04e021 2125 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2126 st->state = TCP_SEQ_STATE_ESTABLISHED;
2127
2128 /* We can reschedule between buckets: */
2129 cond_resched_softirq();
2130
6e04e021
ACM
2131 if (++st->bucket < tcp_hashinfo.ehash_size) {
2132 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2133 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4
LT
2134 } else {
2135 cur = NULL;
2136 goto out;
2137 }
2138 } else
2139 sk = sk_next(sk);
2140
2141 sk_for_each_from(sk, node) {
2142 if (sk->sk_family == st->family)
2143 goto found;
2144 }
2145
2146 st->state = TCP_SEQ_STATE_TIME_WAIT;
6e04e021 2147 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
1da177e4
LT
2148 goto get_tw;
2149found:
2150 cur = sk;
2151out:
2152 return cur;
2153}
2154
2155static void *established_get_idx(struct seq_file *seq, loff_t pos)
2156{
2157 void *rc = established_get_first(seq);
2158
2159 while (rc && pos) {
2160 rc = established_get_next(seq, rc);
2161 --pos;
7174259e 2162 }
1da177e4
LT
2163 return rc;
2164}
2165
2166static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2167{
2168 void *rc;
2169 struct tcp_iter_state* st = seq->private;
2170
f3f05f70 2171 inet_listen_lock(&tcp_hashinfo);
1da177e4
LT
2172 st->state = TCP_SEQ_STATE_LISTENING;
2173 rc = listening_get_idx(seq, &pos);
2174
2175 if (!rc) {
f3f05f70 2176 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2177 local_bh_disable();
2178 st->state = TCP_SEQ_STATE_ESTABLISHED;
2179 rc = established_get_idx(seq, pos);
2180 }
2181
2182 return rc;
2183}
2184
2185static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2186{
2187 struct tcp_iter_state* st = seq->private;
2188 st->state = TCP_SEQ_STATE_LISTENING;
2189 st->num = 0;
2190 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2191}
2192
2193static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2194{
2195 void *rc = NULL;
2196 struct tcp_iter_state* st;
2197
2198 if (v == SEQ_START_TOKEN) {
2199 rc = tcp_get_idx(seq, 0);
2200 goto out;
2201 }
2202 st = seq->private;
2203
2204 switch (st->state) {
2205 case TCP_SEQ_STATE_OPENREQ:
2206 case TCP_SEQ_STATE_LISTENING:
2207 rc = listening_get_next(seq, v);
2208 if (!rc) {
f3f05f70 2209 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2210 local_bh_disable();
2211 st->state = TCP_SEQ_STATE_ESTABLISHED;
2212 rc = established_get_first(seq);
2213 }
2214 break;
2215 case TCP_SEQ_STATE_ESTABLISHED:
2216 case TCP_SEQ_STATE_TIME_WAIT:
2217 rc = established_get_next(seq, v);
2218 break;
2219 }
2220out:
2221 ++*pos;
2222 return rc;
2223}
2224
2225static void tcp_seq_stop(struct seq_file *seq, void *v)
2226{
2227 struct tcp_iter_state* st = seq->private;
2228
2229 switch (st->state) {
2230 case TCP_SEQ_STATE_OPENREQ:
2231 if (v) {
463c84b9
ACM
2232 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2233 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2234 }
2235 case TCP_SEQ_STATE_LISTENING:
2236 if (v != SEQ_START_TOKEN)
f3f05f70 2237 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2238 break;
2239 case TCP_SEQ_STATE_TIME_WAIT:
2240 case TCP_SEQ_STATE_ESTABLISHED:
2241 if (v)
6e04e021 2242 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2243 local_bh_enable();
2244 break;
2245 }
2246}
2247
2248static int tcp_seq_open(struct inode *inode, struct file *file)
2249{
2250 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2251 struct seq_file *seq;
2252 struct tcp_iter_state *s;
2253 int rc;
2254
2255 if (unlikely(afinfo == NULL))
2256 return -EINVAL;
2257
0da974f4 2258 s = kzalloc(sizeof(*s), GFP_KERNEL);
1da177e4
LT
2259 if (!s)
2260 return -ENOMEM;
1da177e4
LT
2261 s->family = afinfo->family;
2262 s->seq_ops.start = tcp_seq_start;
2263 s->seq_ops.next = tcp_seq_next;
2264 s->seq_ops.show = afinfo->seq_show;
2265 s->seq_ops.stop = tcp_seq_stop;
2266
2267 rc = seq_open(file, &s->seq_ops);
2268 if (rc)
2269 goto out_kfree;
2270 seq = file->private_data;
2271 seq->private = s;
2272out:
2273 return rc;
2274out_kfree:
2275 kfree(s);
2276 goto out;
2277}
2278
2279int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2280{
2281 int rc = 0;
2282 struct proc_dir_entry *p;
2283
2284 if (!afinfo)
2285 return -EINVAL;
2286 afinfo->seq_fops->owner = afinfo->owner;
2287 afinfo->seq_fops->open = tcp_seq_open;
2288 afinfo->seq_fops->read = seq_read;
2289 afinfo->seq_fops->llseek = seq_lseek;
2290 afinfo->seq_fops->release = seq_release_private;
7174259e 2291
1da177e4
LT
2292 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2293 if (p)
2294 p->data = afinfo;
2295 else
2296 rc = -ENOMEM;
2297 return rc;
2298}
2299
2300void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2301{
2302 if (!afinfo)
2303 return;
2304 proc_net_remove(afinfo->name);
7174259e 2305 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1da177e4
LT
2306}
2307
60236fdd 2308static void get_openreq4(struct sock *sk, struct request_sock *req,
1da177e4
LT
2309 char *tmpbuf, int i, int uid)
2310{
2e6599cb 2311 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2312 int ttd = req->expires - jiffies;
2313
2314 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2315 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2316 i,
2e6599cb 2317 ireq->loc_addr,
1da177e4 2318 ntohs(inet_sk(sk)->sport),
2e6599cb
ACM
2319 ireq->rmt_addr,
2320 ntohs(ireq->rmt_port),
1da177e4
LT
2321 TCP_SYN_RECV,
2322 0, 0, /* could print option size, but that is af dependent. */
2323 1, /* timers active (only the expire timer) */
2324 jiffies_to_clock_t(ttd),
2325 req->retrans,
2326 uid,
2327 0, /* non standard timer */
2328 0, /* open_requests have no inode */
2329 atomic_read(&sk->sk_refcnt),
2330 req);
2331}
2332
2333static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2334{
2335 int timer_active;
2336 unsigned long timer_expires;
2337 struct tcp_sock *tp = tcp_sk(sp);
463c84b9 2338 const struct inet_connection_sock *icsk = inet_csk(sp);
1da177e4 2339 struct inet_sock *inet = inet_sk(sp);
714e85be
AV
2340 __be32 dest = inet->daddr;
2341 __be32 src = inet->rcv_saddr;
1da177e4
LT
2342 __u16 destp = ntohs(inet->dport);
2343 __u16 srcp = ntohs(inet->sport);
2344
463c84b9 2345 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2346 timer_active = 1;
463c84b9
ACM
2347 timer_expires = icsk->icsk_timeout;
2348 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2349 timer_active = 4;
463c84b9 2350 timer_expires = icsk->icsk_timeout;
1da177e4
LT
2351 } else if (timer_pending(&sp->sk_timer)) {
2352 timer_active = 2;
2353 timer_expires = sp->sk_timer.expires;
2354 } else {
2355 timer_active = 0;
2356 timer_expires = jiffies;
2357 }
2358
2359 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2360 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2361 i, src, srcp, dest, destp, sp->sk_state,
47da8ee6 2362 tp->write_seq - tp->snd_una,
7174259e
ACM
2363 sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
2364 (tp->rcv_nxt - tp->copied_seq),
1da177e4
LT
2365 timer_active,
2366 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2367 icsk->icsk_retransmits,
1da177e4 2368 sock_i_uid(sp),
6687e988 2369 icsk->icsk_probes_out,
1da177e4
LT
2370 sock_i_ino(sp),
2371 atomic_read(&sp->sk_refcnt), sp,
463c84b9
ACM
2372 icsk->icsk_rto,
2373 icsk->icsk_ack.ato,
2374 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4
LT
2375 tp->snd_cwnd,
2376 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2377}
2378
7174259e
ACM
2379static void get_timewait4_sock(struct inet_timewait_sock *tw,
2380 char *tmpbuf, int i)
1da177e4 2381{
23f33c2d 2382 __be32 dest, src;
1da177e4
LT
2383 __u16 destp, srcp;
2384 int ttd = tw->tw_ttd - jiffies;
2385
2386 if (ttd < 0)
2387 ttd = 0;
2388
2389 dest = tw->tw_daddr;
2390 src = tw->tw_rcv_saddr;
2391 destp = ntohs(tw->tw_dport);
2392 srcp = ntohs(tw->tw_sport);
2393
2394 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2395 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2396 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2397 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2398 atomic_read(&tw->tw_refcnt), tw);
2399}
2400
2401#define TMPSZ 150
2402
2403static int tcp4_seq_show(struct seq_file *seq, void *v)
2404{
2405 struct tcp_iter_state* st;
2406 char tmpbuf[TMPSZ + 1];
2407
2408 if (v == SEQ_START_TOKEN) {
2409 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2410 " sl local_address rem_address st tx_queue "
2411 "rx_queue tr tm->when retrnsmt uid timeout "
2412 "inode");
2413 goto out;
2414 }
2415 st = seq->private;
2416
2417 switch (st->state) {
2418 case TCP_SEQ_STATE_LISTENING:
2419 case TCP_SEQ_STATE_ESTABLISHED:
2420 get_tcp4_sock(v, tmpbuf, st->num);
2421 break;
2422 case TCP_SEQ_STATE_OPENREQ:
2423 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2424 break;
2425 case TCP_SEQ_STATE_TIME_WAIT:
2426 get_timewait4_sock(v, tmpbuf, st->num);
2427 break;
2428 }
2429 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2430out:
2431 return 0;
2432}
2433
2434static struct file_operations tcp4_seq_fops;
2435static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2436 .owner = THIS_MODULE,
2437 .name = "tcp",
2438 .family = AF_INET,
2439 .seq_show = tcp4_seq_show,
2440 .seq_fops = &tcp4_seq_fops,
2441};
2442
2443int __init tcp4_proc_init(void)
2444{
2445 return tcp_proc_register(&tcp4_seq_afinfo);
2446}
2447
2448void tcp4_proc_exit(void)
2449{
2450 tcp_proc_unregister(&tcp4_seq_afinfo);
2451}
2452#endif /* CONFIG_PROC_FS */
2453
2454struct proto tcp_prot = {
2455 .name = "TCP",
2456 .owner = THIS_MODULE,
2457 .close = tcp_close,
2458 .connect = tcp_v4_connect,
2459 .disconnect = tcp_disconnect,
463c84b9 2460 .accept = inet_csk_accept,
1da177e4
LT
2461 .ioctl = tcp_ioctl,
2462 .init = tcp_v4_init_sock,
2463 .destroy = tcp_v4_destroy_sock,
2464 .shutdown = tcp_shutdown,
2465 .setsockopt = tcp_setsockopt,
2466 .getsockopt = tcp_getsockopt,
2467 .sendmsg = tcp_sendmsg,
2468 .recvmsg = tcp_recvmsg,
2469 .backlog_rcv = tcp_v4_do_rcv,
2470 .hash = tcp_v4_hash,
2471 .unhash = tcp_unhash,
2472 .get_port = tcp_v4_get_port,
2473 .enter_memory_pressure = tcp_enter_memory_pressure,
2474 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2475 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2476 .memory_allocated = &tcp_memory_allocated,
2477 .memory_pressure = &tcp_memory_pressure,
2478 .sysctl_mem = sysctl_tcp_mem,
2479 .sysctl_wmem = sysctl_tcp_wmem,
2480 .sysctl_rmem = sysctl_tcp_rmem,
2481 .max_header = MAX_TCP_HEADER,
2482 .obj_size = sizeof(struct tcp_sock),
6d6ee43e 2483 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2484 .rsk_prot = &tcp_request_sock_ops,
543d9cfe
ACM
2485#ifdef CONFIG_COMPAT
2486 .compat_setsockopt = compat_tcp_setsockopt,
2487 .compat_getsockopt = compat_tcp_getsockopt,
2488#endif
1da177e4
LT
2489};
2490
1da177e4
LT
2491void __init tcp_v4_init(struct net_proto_family *ops)
2492{
7174259e
ACM
2493 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2494 IPPROTO_TCP) < 0)
1da177e4 2495 panic("Failed to create the TCP control socket.\n");
1da177e4
LT
2496}
2497
2498EXPORT_SYMBOL(ipv4_specific);
1da177e4 2499EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 2500EXPORT_SYMBOL(tcp_prot);
1da177e4
LT
2501EXPORT_SYMBOL(tcp_unhash);
2502EXPORT_SYMBOL(tcp_v4_conn_request);
2503EXPORT_SYMBOL(tcp_v4_connect);
2504EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
2505EXPORT_SYMBOL(tcp_v4_remember_stamp);
2506EXPORT_SYMBOL(tcp_v4_send_check);
2507EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2508
2509#ifdef CONFIG_PROC_FS
2510EXPORT_SYMBOL(tcp_proc_register);
2511EXPORT_SYMBOL(tcp_proc_unregister);
2512#endif
2513EXPORT_SYMBOL(sysctl_local_port_range);
1da177e4 2514EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 2515