]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/tcp_ipv4.c
[SOCK]: Rename __tcp_v4_rehash to __sk_prot_rehash
[net-next-2.6.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions
11 *
12 *
13 * code split from:
14 * linux/ipv4/tcp.c
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
17 *
18 * See tcp.c for author information
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26/*
27 * Changes:
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
60236fdd 39 * request_sock handling and moved
1da177e4
LT
40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics.
43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
47 * coma.
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
53 */
54
55#include <linux/config.h>
56
57#include <linux/types.h>
58#include <linux/fcntl.h>
59#include <linux/module.h>
60#include <linux/random.h>
61#include <linux/cache.h>
62#include <linux/jhash.h>
63#include <linux/init.h>
64#include <linux/times.h>
65
66#include <net/icmp.h>
67#include <net/tcp.h>
68#include <net/ipv6.h>
69#include <net/inet_common.h>
70#include <net/xfrm.h>
71
72#include <linux/inet.h>
73#include <linux/ipv6.h>
74#include <linux/stddef.h>
75#include <linux/proc_fs.h>
76#include <linux/seq_file.h>
77
78extern int sysctl_ip_dynaddr;
79int sysctl_tcp_tw_reuse;
80int sysctl_tcp_low_latency;
81
82/* Check TCP sequence numbers in ICMP packets. */
83#define ICMP_MIN_LENGTH 8
84
85/* Socket used for sending RSTs */
86static struct socket *tcp_socket;
87
88void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
89 struct sk_buff *skb);
90
91struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
92 .__tcp_lhash_lock = RW_LOCK_UNLOCKED,
93 .__tcp_lhash_users = ATOMIC_INIT(0),
94 .__tcp_lhash_wait
95 = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
96 .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED
97};
98
99/*
100 * This array holds the first and last local port number.
101 * For high-usage systems, use sysctl to change this to
102 * 32768-61000
103 */
104int sysctl_local_port_range[2] = { 1024, 4999 };
105int tcp_port_rover = 1024 - 1;
106
107static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108 __u32 faddr, __u16 fport)
109{
110 int h = (laddr ^ lport) ^ (faddr ^ fport);
111 h ^= h >> 16;
112 h ^= h >> 8;
113 return h & (tcp_ehash_size - 1);
114}
115
116static __inline__ int tcp_sk_hashfn(struct sock *sk)
117{
118 struct inet_sock *inet = inet_sk(sk);
119 __u32 laddr = inet->rcv_saddr;
120 __u16 lport = inet->num;
121 __u32 faddr = inet->daddr;
122 __u16 fport = inet->dport;
123
124 return tcp_hashfn(laddr, lport, faddr, fport);
125}
126
127/* Allocate and initialize a new TCP local port bind bucket.
128 * The bindhash mutex for snum's hash chain must be held here.
129 */
130struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131 unsigned short snum)
132{
133 struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134 SLAB_ATOMIC);
135 if (tb) {
136 tb->port = snum;
137 tb->fastreuse = 0;
138 INIT_HLIST_HEAD(&tb->owners);
139 hlist_add_head(&tb->node, &head->chain);
140 }
141 return tb;
142}
143
144/* Caller must hold hashbucket lock for this tb with local BH disabled */
145void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146{
147 if (hlist_empty(&tb->owners)) {
148 __hlist_del(&tb->node);
149 kmem_cache_free(tcp_bucket_cachep, tb);
150 }
151}
152
153/* Caller must disable local BH processing. */
154static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155{
156 struct tcp_bind_hashbucket *head =
157 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158 struct tcp_bind_bucket *tb;
159
160 spin_lock(&head->lock);
161 tb = tcp_sk(sk)->bind_hash;
162 sk_add_bind_node(child, &tb->owners);
163 tcp_sk(child)->bind_hash = tb;
164 spin_unlock(&head->lock);
165}
166
167inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168{
169 local_bh_disable();
170 __tcp_inherit_port(sk, child);
171 local_bh_enable();
172}
173
174void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175 unsigned short snum)
176{
177 inet_sk(sk)->num = snum;
178 sk_add_bind_node(sk, &tb->owners);
179 tcp_sk(sk)->bind_hash = tb;
180}
181
182static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183{
184 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185 struct sock *sk2;
186 struct hlist_node *node;
187 int reuse = sk->sk_reuse;
188
189 sk_for_each_bound(sk2, node, &tb->owners) {
190 if (sk != sk2 &&
191 !tcp_v6_ipv6only(sk2) &&
192 (!sk->sk_bound_dev_if ||
193 !sk2->sk_bound_dev_if ||
194 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195 if (!reuse || !sk2->sk_reuse ||
196 sk2->sk_state == TCP_LISTEN) {
197 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199 sk2_rcv_saddr == sk_rcv_saddr)
200 break;
201 }
202 }
203 }
204 return node != NULL;
205}
206
207/* Obtain a reference to a local port for the given sock,
208 * if snum is zero it means select any available local port.
209 */
210static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211{
212 struct tcp_bind_hashbucket *head;
213 struct hlist_node *node;
214 struct tcp_bind_bucket *tb;
215 int ret;
216
217 local_bh_disable();
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int remaining = (high - low) + 1;
222 int rover;
223
224 spin_lock(&tcp_portalloc_lock);
0b2531bd
FH
225 if (tcp_port_rover < low)
226 rover = low;
227 else
228 rover = tcp_port_rover;
1da177e4
LT
229 do {
230 rover++;
0b2531bd 231 if (rover > high)
1da177e4
LT
232 rover = low;
233 head = &tcp_bhash[tcp_bhashfn(rover)];
234 spin_lock(&head->lock);
235 tb_for_each(tb, node, &head->chain)
236 if (tb->port == rover)
237 goto next;
238 break;
239 next:
240 spin_unlock(&head->lock);
241 } while (--remaining > 0);
242 tcp_port_rover = rover;
243 spin_unlock(&tcp_portalloc_lock);
244
d5d28375
DM
245 /* Exhausted local port range during search? It is not
246 * possible for us to be holding one of the bind hash
247 * locks if this test triggers, because if 'remaining'
248 * drops to zero, we broke out of the do/while loop at
249 * the top level, not from the 'break;' statement.
250 */
1da177e4 251 ret = 1;
d5d28375 252 if (unlikely(remaining <= 0))
1da177e4
LT
253 goto fail;
254
255 /* OK, here is the one we will use. HEAD is
256 * non-NULL and we hold it's mutex.
257 */
258 snum = rover;
259 } else {
260 head = &tcp_bhash[tcp_bhashfn(snum)];
261 spin_lock(&head->lock);
262 tb_for_each(tb, node, &head->chain)
263 if (tb->port == snum)
264 goto tb_found;
265 }
266 tb = NULL;
267 goto tb_not_found;
268tb_found:
269 if (!hlist_empty(&tb->owners)) {
270 if (sk->sk_reuse > 1)
271 goto success;
272 if (tb->fastreuse > 0 &&
273 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
274 goto success;
275 } else {
276 ret = 1;
277 if (tcp_bind_conflict(sk, tb))
278 goto fail_unlock;
279 }
280 }
281tb_not_found:
282 ret = 1;
283 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
284 goto fail_unlock;
285 if (hlist_empty(&tb->owners)) {
286 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
287 tb->fastreuse = 1;
288 else
289 tb->fastreuse = 0;
290 } else if (tb->fastreuse &&
291 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
292 tb->fastreuse = 0;
293success:
294 if (!tcp_sk(sk)->bind_hash)
295 tcp_bind_hash(sk, tb, snum);
296 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
297 ret = 0;
298
299fail_unlock:
300 spin_unlock(&head->lock);
301fail:
302 local_bh_enable();
303 return ret;
304}
305
306/* Get rid of any references to a local port held by the
307 * given sock.
308 */
309static void __tcp_put_port(struct sock *sk)
310{
311 struct inet_sock *inet = inet_sk(sk);
312 struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
313 struct tcp_bind_bucket *tb;
314
315 spin_lock(&head->lock);
316 tb = tcp_sk(sk)->bind_hash;
317 __sk_del_bind_node(sk);
318 tcp_sk(sk)->bind_hash = NULL;
319 inet->num = 0;
320 tcp_bucket_destroy(tb);
321 spin_unlock(&head->lock);
322}
323
324void tcp_put_port(struct sock *sk)
325{
326 local_bh_disable();
327 __tcp_put_port(sk);
328 local_bh_enable();
329}
330
331/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
332 * Look, when several writers sleep and reader wakes them up, all but one
333 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
334 * this, _but_ remember, it adds useless work on UP machines (wake up each
335 * exclusive lock release). It should be ifdefed really.
336 */
337
338void tcp_listen_wlock(void)
339{
340 write_lock(&tcp_lhash_lock);
341
342 if (atomic_read(&tcp_lhash_users)) {
343 DEFINE_WAIT(wait);
344
345 for (;;) {
346 prepare_to_wait_exclusive(&tcp_lhash_wait,
347 &wait, TASK_UNINTERRUPTIBLE);
348 if (!atomic_read(&tcp_lhash_users))
349 break;
350 write_unlock_bh(&tcp_lhash_lock);
351 schedule();
352 write_lock_bh(&tcp_lhash_lock);
353 }
354
355 finish_wait(&tcp_lhash_wait, &wait);
356 }
357}
358
359static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
360{
361 struct hlist_head *list;
362 rwlock_t *lock;
363
364 BUG_TRAP(sk_unhashed(sk));
365 if (listen_possible && sk->sk_state == TCP_LISTEN) {
366 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
367 lock = &tcp_lhash_lock;
368 tcp_listen_wlock();
369 } else {
370 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
371 lock = &tcp_ehash[sk->sk_hashent].lock;
372 write_lock(lock);
373 }
374 __sk_add_node(sk, list);
375 sock_prot_inc_use(sk->sk_prot);
376 write_unlock(lock);
377 if (listen_possible && sk->sk_state == TCP_LISTEN)
378 wake_up(&tcp_lhash_wait);
379}
380
381static void tcp_v4_hash(struct sock *sk)
382{
383 if (sk->sk_state != TCP_CLOSE) {
384 local_bh_disable();
385 __tcp_v4_hash(sk, 1);
386 local_bh_enable();
387 }
388}
389
390void tcp_unhash(struct sock *sk)
391{
392 rwlock_t *lock;
393
394 if (sk_unhashed(sk))
395 goto ende;
396
397 if (sk->sk_state == TCP_LISTEN) {
398 local_bh_disable();
399 tcp_listen_wlock();
400 lock = &tcp_lhash_lock;
401 } else {
402 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
403 lock = &head->lock;
404 write_lock_bh(&head->lock);
405 }
406
407 if (__sk_del_node_init(sk))
408 sock_prot_dec_use(sk->sk_prot);
409 write_unlock_bh(lock);
410
411 ende:
412 if (sk->sk_state == TCP_LISTEN)
413 wake_up(&tcp_lhash_wait);
414}
415
416/* Don't inline this cruft. Here are some nice properties to
417 * exploit here. The BSD API does not allow a listening TCP
418 * to specify the remote port nor the remote address for the
419 * connection. So always assume those are both wildcarded
420 * during the search since they can never be otherwise.
421 */
422static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
423 unsigned short hnum, int dif)
424{
425 struct sock *result = NULL, *sk;
426 struct hlist_node *node;
427 int score, hiscore;
428
429 hiscore=-1;
430 sk_for_each(sk, node, head) {
431 struct inet_sock *inet = inet_sk(sk);
432
433 if (inet->num == hnum && !ipv6_only_sock(sk)) {
434 __u32 rcv_saddr = inet->rcv_saddr;
435
436 score = (sk->sk_family == PF_INET ? 1 : 0);
437 if (rcv_saddr) {
438 if (rcv_saddr != daddr)
439 continue;
440 score+=2;
441 }
442 if (sk->sk_bound_dev_if) {
443 if (sk->sk_bound_dev_if != dif)
444 continue;
445 score+=2;
446 }
447 if (score == 5)
448 return sk;
449 if (score > hiscore) {
450 hiscore = score;
451 result = sk;
452 }
453 }
454 }
455 return result;
456}
457
458/* Optimize the common listener case. */
459static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
460 unsigned short hnum, int dif)
461{
462 struct sock *sk = NULL;
463 struct hlist_head *head;
464
465 read_lock(&tcp_lhash_lock);
466 head = &tcp_listening_hash[tcp_lhashfn(hnum)];
467 if (!hlist_empty(head)) {
468 struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
469
470 if (inet->num == hnum && !sk->sk_node.next &&
471 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
472 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
473 !sk->sk_bound_dev_if)
474 goto sherry_cache;
475 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
476 }
477 if (sk) {
478sherry_cache:
479 sock_hold(sk);
480 }
481 read_unlock(&tcp_lhash_lock);
482 return sk;
483}
484
485/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
486 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
487 *
488 * Local BH must be disabled here.
489 */
490
491static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
492 u32 daddr, u16 hnum,
493 int dif)
494{
495 struct tcp_ehash_bucket *head;
496 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
497 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
498 struct sock *sk;
499 struct hlist_node *node;
500 /* Optimize here for direct hit, only listening connections can
501 * have wildcards anyways.
502 */
503 int hash = tcp_hashfn(daddr, hnum, saddr, sport);
504 head = &tcp_ehash[hash];
505 read_lock(&head->lock);
506 sk_for_each(sk, node, &head->chain) {
507 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
508 goto hit; /* You sunk my battleship! */
509 }
510
511 /* Must check for a TIME_WAIT'er before going to listener hash. */
512 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
513 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
514 goto hit;
515 }
516 sk = NULL;
517out:
518 read_unlock(&head->lock);
519 return sk;
520hit:
521 sock_hold(sk);
522 goto out;
523}
524
525static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
526 u32 daddr, u16 hnum, int dif)
527{
528 struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
529 daddr, hnum, dif);
530
531 return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
532}
533
534inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
535 u16 dport, int dif)
536{
537 struct sock *sk;
538
539 local_bh_disable();
540 sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
541 local_bh_enable();
542
543 return sk;
544}
545
546EXPORT_SYMBOL_GPL(tcp_v4_lookup);
547
548static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
549{
550 return secure_tcp_sequence_number(skb->nh.iph->daddr,
551 skb->nh.iph->saddr,
552 skb->h.th->dest,
553 skb->h.th->source);
554}
555
556/* called with local bh disabled */
557static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
558 struct tcp_tw_bucket **twp)
559{
560 struct inet_sock *inet = inet_sk(sk);
561 u32 daddr = inet->rcv_saddr;
562 u32 saddr = inet->daddr;
563 int dif = sk->sk_bound_dev_if;
564 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
565 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
566 int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
567 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
568 struct sock *sk2;
569 struct hlist_node *node;
570 struct tcp_tw_bucket *tw;
571
572 write_lock(&head->lock);
573
574 /* Check TIME-WAIT sockets first. */
575 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
576 tw = (struct tcp_tw_bucket *)sk2;
577
578 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
579 struct tcp_sock *tp = tcp_sk(sk);
580
581 /* With PAWS, it is safe from the viewpoint
582 of data integrity. Even without PAWS it
583 is safe provided sequence spaces do not
584 overlap i.e. at data rates <= 80Mbit/sec.
585
586 Actually, the idea is close to VJ's one,
587 only timestamp cache is held not per host,
588 but per port pair and TW bucket is used
589 as state holder.
590
591 If TW bucket has been already destroyed we
592 fall back to VJ's scheme and use initial
593 timestamp retrieved from peer table.
594 */
595 if (tw->tw_ts_recent_stamp &&
596 (!twp || (sysctl_tcp_tw_reuse &&
597 xtime.tv_sec -
598 tw->tw_ts_recent_stamp > 1))) {
599 if ((tp->write_seq =
600 tw->tw_snd_nxt + 65535 + 2) == 0)
601 tp->write_seq = 1;
602 tp->rx_opt.ts_recent = tw->tw_ts_recent;
603 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
604 sock_hold(sk2);
605 goto unique;
606 } else
607 goto not_unique;
608 }
609 }
610 tw = NULL;
611
612 /* And established part... */
613 sk_for_each(sk2, node, &head->chain) {
614 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
615 goto not_unique;
616 }
617
618unique:
619 /* Must record num and sport now. Otherwise we will see
620 * in hash table socket with a funny identity. */
621 inet->num = lport;
622 inet->sport = htons(lport);
623 sk->sk_hashent = hash;
624 BUG_TRAP(sk_unhashed(sk));
625 __sk_add_node(sk, &head->chain);
626 sock_prot_inc_use(sk->sk_prot);
627 write_unlock(&head->lock);
628
629 if (twp) {
630 *twp = tw;
631 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
632 } else if (tw) {
633 /* Silly. Should hash-dance instead... */
634 tcp_tw_deschedule(tw);
635 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
636
637 tcp_tw_put(tw);
638 }
639
640 return 0;
641
642not_unique:
643 write_unlock(&head->lock);
644 return -EADDRNOTAVAIL;
645}
646
647static inline u32 connect_port_offset(const struct sock *sk)
648{
649 const struct inet_sock *inet = inet_sk(sk);
650
651 return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
652 inet->dport);
653}
654
655/*
656 * Bind a port for a connect operation and hash it.
657 */
658static inline int tcp_v4_hash_connect(struct sock *sk)
659{
660 unsigned short snum = inet_sk(sk)->num;
661 struct tcp_bind_hashbucket *head;
662 struct tcp_bind_bucket *tb;
663 int ret;
664
665 if (!snum) {
666 int low = sysctl_local_port_range[0];
667 int high = sysctl_local_port_range[1];
668 int range = high - low;
669 int i;
670 int port;
671 static u32 hint;
672 u32 offset = hint + connect_port_offset(sk);
673 struct hlist_node *node;
674 struct tcp_tw_bucket *tw = NULL;
675
676 local_bh_disable();
677 for (i = 1; i <= range; i++) {
678 port = low + (i + offset) % range;
679 head = &tcp_bhash[tcp_bhashfn(port)];
680 spin_lock(&head->lock);
681
682 /* Does not bother with rcv_saddr checks,
683 * because the established check is already
684 * unique enough.
685 */
686 tb_for_each(tb, node, &head->chain) {
687 if (tb->port == port) {
688 BUG_TRAP(!hlist_empty(&tb->owners));
689 if (tb->fastreuse >= 0)
690 goto next_port;
691 if (!__tcp_v4_check_established(sk,
692 port,
693 &tw))
694 goto ok;
695 goto next_port;
696 }
697 }
698
699 tb = tcp_bucket_create(head, port);
700 if (!tb) {
701 spin_unlock(&head->lock);
702 break;
703 }
704 tb->fastreuse = -1;
705 goto ok;
706
707 next_port:
708 spin_unlock(&head->lock);
709 }
710 local_bh_enable();
711
712 return -EADDRNOTAVAIL;
713
714ok:
715 hint += i;
716
717 /* Head lock still held and bh's disabled */
718 tcp_bind_hash(sk, tb, port);
719 if (sk_unhashed(sk)) {
720 inet_sk(sk)->sport = htons(port);
721 __tcp_v4_hash(sk, 0);
722 }
723 spin_unlock(&head->lock);
724
725 if (tw) {
726 tcp_tw_deschedule(tw);
727 tcp_tw_put(tw);
728 }
729
730 ret = 0;
731 goto out;
732 }
733
734 head = &tcp_bhash[tcp_bhashfn(snum)];
735 tb = tcp_sk(sk)->bind_hash;
736 spin_lock_bh(&head->lock);
737 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
738 __tcp_v4_hash(sk, 0);
739 spin_unlock_bh(&head->lock);
740 return 0;
741 } else {
742 spin_unlock(&head->lock);
743 /* No definite answer... Walk to established hash table */
744 ret = __tcp_v4_check_established(sk, snum, NULL);
745out:
746 local_bh_enable();
747 return ret;
748 }
749}
750
751/* This will initiate an outgoing connection. */
752int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
753{
754 struct inet_sock *inet = inet_sk(sk);
755 struct tcp_sock *tp = tcp_sk(sk);
756 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
757 struct rtable *rt;
758 u32 daddr, nexthop;
759 int tmp;
760 int err;
761
762 if (addr_len < sizeof(struct sockaddr_in))
763 return -EINVAL;
764
765 if (usin->sin_family != AF_INET)
766 return -EAFNOSUPPORT;
767
768 nexthop = daddr = usin->sin_addr.s_addr;
769 if (inet->opt && inet->opt->srr) {
770 if (!daddr)
771 return -EINVAL;
772 nexthop = inet->opt->faddr;
773 }
774
775 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
776 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
777 IPPROTO_TCP,
778 inet->sport, usin->sin_port, sk);
779 if (tmp < 0)
780 return tmp;
781
782 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
783 ip_rt_put(rt);
784 return -ENETUNREACH;
785 }
786
787 if (!inet->opt || !inet->opt->srr)
788 daddr = rt->rt_dst;
789
790 if (!inet->saddr)
791 inet->saddr = rt->rt_src;
792 inet->rcv_saddr = inet->saddr;
793
794 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
795 /* Reset inherited state */
796 tp->rx_opt.ts_recent = 0;
797 tp->rx_opt.ts_recent_stamp = 0;
798 tp->write_seq = 0;
799 }
800
801 if (sysctl_tcp_tw_recycle &&
802 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
803 struct inet_peer *peer = rt_get_peer(rt);
804
805 /* VJ's idea. We save last timestamp seen from
806 * the destination in peer table, when entering state TIME-WAIT
807 * and initialize rx_opt.ts_recent from it, when trying new connection.
808 */
809
810 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
811 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
812 tp->rx_opt.ts_recent = peer->tcp_ts;
813 }
814 }
815
816 inet->dport = usin->sin_port;
817 inet->daddr = daddr;
818
819 tp->ext_header_len = 0;
820 if (inet->opt)
821 tp->ext_header_len = inet->opt->optlen;
822
823 tp->rx_opt.mss_clamp = 536;
824
825 /* Socket identity is still unknown (sport may be zero).
826 * However we set state to SYN-SENT and not releasing socket
827 * lock select source port, enter ourselves into the hash tables and
828 * complete initialization after this.
829 */
830 tcp_set_state(sk, TCP_SYN_SENT);
831 err = tcp_v4_hash_connect(sk);
832 if (err)
833 goto failure;
834
835 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
836 if (err)
837 goto failure;
838
839 /* OK, now commit destination to socket. */
840 __sk_dst_set(sk, &rt->u.dst);
841 tcp_v4_setup_caps(sk, &rt->u.dst);
842
843 if (!tp->write_seq)
844 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
845 inet->daddr,
846 inet->sport,
847 usin->sin_port);
848
849 inet->id = tp->write_seq ^ jiffies;
850
851 err = tcp_connect(sk);
852 rt = NULL;
853 if (err)
854 goto failure;
855
856 return 0;
857
858failure:
859 /* This unhashes the socket and releases the local port, if necessary. */
860 tcp_set_state(sk, TCP_CLOSE);
861 ip_rt_put(rt);
862 sk->sk_route_caps = 0;
863 inet->dport = 0;
864 return err;
865}
866
867static __inline__ int tcp_v4_iif(struct sk_buff *skb)
868{
869 return ((struct rtable *)skb->dst)->rt_iif;
870}
871
872static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
873{
874 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
875}
876
60236fdd
ACM
877static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
878 struct request_sock ***prevp,
1da177e4
LT
879 __u16 rport,
880 __u32 raddr, __u32 laddr)
881{
2ad69c55 882 struct listen_sock *lopt = tp->accept_queue.listen_opt;
60236fdd 883 struct request_sock *req, **prev;
1da177e4
LT
884
885 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
886 (req = *prev) != NULL;
887 prev = &req->dl_next) {
2e6599cb
ACM
888 const struct inet_request_sock *ireq = inet_rsk(req);
889
890 if (ireq->rmt_port == rport &&
891 ireq->rmt_addr == raddr &&
892 ireq->loc_addr == laddr &&
60236fdd 893 TCP_INET_FAMILY(req->rsk_ops->family)) {
1da177e4
LT
894 BUG_TRAP(!req->sk);
895 *prevp = prev;
896 break;
897 }
898 }
899
900 return req;
901}
902
60236fdd 903static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
1da177e4
LT
904{
905 struct tcp_sock *tp = tcp_sk(sk);
2ad69c55 906 struct listen_sock *lopt = tp->accept_queue.listen_opt;
2e6599cb 907 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1da177e4 908
0e87506f 909 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1da177e4
LT
910 tcp_synq_added(sk);
911}
912
913
914/*
915 * This routine does path mtu discovery as defined in RFC1191.
916 */
917static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
918 u32 mtu)
919{
920 struct dst_entry *dst;
921 struct inet_sock *inet = inet_sk(sk);
922 struct tcp_sock *tp = tcp_sk(sk);
923
924 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
925 * send out by Linux are always <576bytes so they should go through
926 * unfragmented).
927 */
928 if (sk->sk_state == TCP_LISTEN)
929 return;
930
931 /* We don't check in the destentry if pmtu discovery is forbidden
932 * on this route. We just assume that no packet_to_big packets
933 * are send back when pmtu discovery is not active.
934 * There is a small race when the user changes this flag in the
935 * route, but I think that's acceptable.
936 */
937 if ((dst = __sk_dst_check(sk, 0)) == NULL)
938 return;
939
940 dst->ops->update_pmtu(dst, mtu);
941
942 /* Something is about to be wrong... Remember soft error
943 * for the case, if this connection will not able to recover.
944 */
945 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
946 sk->sk_err_soft = EMSGSIZE;
947
948 mtu = dst_mtu(dst);
949
950 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
951 tp->pmtu_cookie > mtu) {
952 tcp_sync_mss(sk, mtu);
953
954 /* Resend the TCP packet because it's
955 * clear that the old packet has been
956 * dropped. This is the new "fast" path mtu
957 * discovery.
958 */
959 tcp_simple_retransmit(sk);
960 } /* else let the usual retransmit timer handle it */
961}
962
963/*
964 * This routine is called by the ICMP module when it gets some
965 * sort of error condition. If err < 0 then the socket should
966 * be closed and the error returned to the user. If err > 0
967 * it's just the icmp type << 8 | icmp code. After adjustment
968 * header points to the first 8 bytes of the tcp header. We need
969 * to find the appropriate port.
970 *
971 * The locking strategy used here is very "optimistic". When
972 * someone else accesses the socket the ICMP is just dropped
973 * and for some paths there is no check at all.
974 * A more general error queue to queue errors for later handling
975 * is probably better.
976 *
977 */
978
979void tcp_v4_err(struct sk_buff *skb, u32 info)
980{
981 struct iphdr *iph = (struct iphdr *)skb->data;
982 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
983 struct tcp_sock *tp;
984 struct inet_sock *inet;
985 int type = skb->h.icmph->type;
986 int code = skb->h.icmph->code;
987 struct sock *sk;
988 __u32 seq;
989 int err;
990
991 if (skb->len < (iph->ihl << 2) + 8) {
992 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
993 return;
994 }
995
996 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
997 th->source, tcp_v4_iif(skb));
998 if (!sk) {
999 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1000 return;
1001 }
1002 if (sk->sk_state == TCP_TIME_WAIT) {
1003 tcp_tw_put((struct tcp_tw_bucket *)sk);
1004 return;
1005 }
1006
1007 bh_lock_sock(sk);
1008 /* If too many ICMPs get dropped on busy
1009 * servers this needs to be solved differently.
1010 */
1011 if (sock_owned_by_user(sk))
1012 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
1013
1014 if (sk->sk_state == TCP_CLOSE)
1015 goto out;
1016
1017 tp = tcp_sk(sk);
1018 seq = ntohl(th->seq);
1019 if (sk->sk_state != TCP_LISTEN &&
1020 !between(seq, tp->snd_una, tp->snd_nxt)) {
1021 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
1022 goto out;
1023 }
1024
1025 switch (type) {
1026 case ICMP_SOURCE_QUENCH:
1027 /* Just silently ignore these. */
1028 goto out;
1029 case ICMP_PARAMETERPROB:
1030 err = EPROTO;
1031 break;
1032 case ICMP_DEST_UNREACH:
1033 if (code > NR_ICMP_UNREACH)
1034 goto out;
1035
1036 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
1037 if (!sock_owned_by_user(sk))
1038 do_pmtu_discovery(sk, iph, info);
1039 goto out;
1040 }
1041
1042 err = icmp_err_convert[code].errno;
1043 break;
1044 case ICMP_TIME_EXCEEDED:
1045 err = EHOSTUNREACH;
1046 break;
1047 default:
1048 goto out;
1049 }
1050
1051 switch (sk->sk_state) {
60236fdd 1052 struct request_sock *req, **prev;
1da177e4
LT
1053 case TCP_LISTEN:
1054 if (sock_owned_by_user(sk))
1055 goto out;
1056
1057 req = tcp_v4_search_req(tp, &prev, th->dest,
1058 iph->daddr, iph->saddr);
1059 if (!req)
1060 goto out;
1061
1062 /* ICMPs are not backlogged, hence we cannot get
1063 an established socket here.
1064 */
1065 BUG_TRAP(!req->sk);
1066
2e6599cb 1067 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
1068 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1069 goto out;
1070 }
1071
1072 /*
1073 * Still in SYN_RECV, just remove it silently.
1074 * There is no good way to pass the error to the newly
1075 * created socket, and POSIX does not want network
1076 * errors returned from accept().
1077 */
1078 tcp_synq_drop(sk, req, prev);
1079 goto out;
1080
1081 case TCP_SYN_SENT:
1082 case TCP_SYN_RECV: /* Cannot happen.
1083 It can f.e. if SYNs crossed.
1084 */
1085 if (!sock_owned_by_user(sk)) {
1086 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1087 sk->sk_err = err;
1088
1089 sk->sk_error_report(sk);
1090
1091 tcp_done(sk);
1092 } else {
1093 sk->sk_err_soft = err;
1094 }
1095 goto out;
1096 }
1097
1098 /* If we've already connected we will keep trying
1099 * until we time out, or the user gives up.
1100 *
1101 * rfc1122 4.2.3.9 allows to consider as hard errors
1102 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
1103 * but it is obsoleted by pmtu discovery).
1104 *
1105 * Note, that in modern internet, where routing is unreliable
1106 * and in each dark corner broken firewalls sit, sending random
1107 * errors ordered by their masters even this two messages finally lose
1108 * their original sense (even Linux sends invalid PORT_UNREACHs)
1109 *
1110 * Now we are in compliance with RFCs.
1111 * --ANK (980905)
1112 */
1113
1114 inet = inet_sk(sk);
1115 if (!sock_owned_by_user(sk) && inet->recverr) {
1116 sk->sk_err = err;
1117 sk->sk_error_report(sk);
1118 } else { /* Only an error on timeout */
1119 sk->sk_err_soft = err;
1120 }
1121
1122out:
1123 bh_unlock_sock(sk);
1124 sock_put(sk);
1125}
1126
1127/* This routine computes an IPv4 TCP checksum. */
1128void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1129 struct sk_buff *skb)
1130{
1131 struct inet_sock *inet = inet_sk(sk);
1132
1133 if (skb->ip_summed == CHECKSUM_HW) {
1134 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
1135 skb->csum = offsetof(struct tcphdr, check);
1136 } else {
1137 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
1138 csum_partial((char *)th,
1139 th->doff << 2,
1140 skb->csum));
1141 }
1142}
1143
1144/*
1145 * This routine will send an RST to the other tcp.
1146 *
1147 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
1148 * for reset.
1149 * Answer: if a packet caused RST, it is not for a socket
1150 * existing in our system, if it is matched to a socket,
1151 * it is just duplicate segment or bug in other side's TCP.
1152 * So that we build reply only basing on parameters
1153 * arrived with segment.
1154 * Exception: precedence violation. We do not implement it in any case.
1155 */
1156
1157static void tcp_v4_send_reset(struct sk_buff *skb)
1158{
1159 struct tcphdr *th = skb->h.th;
1160 struct tcphdr rth;
1161 struct ip_reply_arg arg;
1162
1163 /* Never send a reset in response to a reset. */
1164 if (th->rst)
1165 return;
1166
1167 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
1168 return;
1169
1170 /* Swap the send and the receive. */
1171 memset(&rth, 0, sizeof(struct tcphdr));
1172 rth.dest = th->source;
1173 rth.source = th->dest;
1174 rth.doff = sizeof(struct tcphdr) / 4;
1175 rth.rst = 1;
1176
1177 if (th->ack) {
1178 rth.seq = th->ack_seq;
1179 } else {
1180 rth.ack = 1;
1181 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
1182 skb->len - (th->doff << 2));
1183 }
1184
1185 memset(&arg, 0, sizeof arg);
1186 arg.iov[0].iov_base = (unsigned char *)&rth;
1187 arg.iov[0].iov_len = sizeof rth;
1188 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1189 skb->nh.iph->saddr, /*XXX*/
1190 sizeof(struct tcphdr), IPPROTO_TCP, 0);
1191 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1192
1193 ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1194
1195 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1196 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1197}
1198
1199/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
1200 outside socket context is ugly, certainly. What can I do?
1201 */
1202
1203static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1204 u32 win, u32 ts)
1205{
1206 struct tcphdr *th = skb->h.th;
1207 struct {
1208 struct tcphdr th;
1209 u32 tsopt[3];
1210 } rep;
1211 struct ip_reply_arg arg;
1212
1213 memset(&rep.th, 0, sizeof(struct tcphdr));
1214 memset(&arg, 0, sizeof arg);
1215
1216 arg.iov[0].iov_base = (unsigned char *)&rep;
1217 arg.iov[0].iov_len = sizeof(rep.th);
1218 if (ts) {
1219 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1220 (TCPOPT_TIMESTAMP << 8) |
1221 TCPOLEN_TIMESTAMP);
1222 rep.tsopt[1] = htonl(tcp_time_stamp);
1223 rep.tsopt[2] = htonl(ts);
1224 arg.iov[0].iov_len = sizeof(rep);
1225 }
1226
1227 /* Swap the send and the receive. */
1228 rep.th.dest = th->source;
1229 rep.th.source = th->dest;
1230 rep.th.doff = arg.iov[0].iov_len / 4;
1231 rep.th.seq = htonl(seq);
1232 rep.th.ack_seq = htonl(ack);
1233 rep.th.ack = 1;
1234 rep.th.window = htons(win);
1235
1236 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1237 skb->nh.iph->saddr, /*XXX*/
1238 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1239 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1240
1241 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1242
1243 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1244}
1245
1246static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1247{
1248 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1249
1250 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1251 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1252
1253 tcp_tw_put(tw);
1254}
1255
60236fdd 1256static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 1257{
2e6599cb 1258 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1da177e4
LT
1259 req->ts_recent);
1260}
1261
1262static struct dst_entry* tcp_v4_route_req(struct sock *sk,
60236fdd 1263 struct request_sock *req)
1da177e4
LT
1264{
1265 struct rtable *rt;
2e6599cb
ACM
1266 const struct inet_request_sock *ireq = inet_rsk(req);
1267 struct ip_options *opt = inet_rsk(req)->opt;
1da177e4
LT
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr :
2e6599cb
ACM
1272 ireq->rmt_addr),
1273 .saddr = ireq->loc_addr,
1da177e4
LT
1274 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport,
2e6599cb 1278 .dport = ireq->rmt_port } } };
1da177e4
LT
1279
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1282 return NULL;
1283 }
1284 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1285 ip_rt_put(rt);
1286 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1287 return NULL;
1288 }
1289 return &rt->u.dst;
1290}
1291
1292/*
1293 * Send a SYN-ACK after having received an ACK.
60236fdd 1294 * This still operates on a request_sock only, not on a big
1da177e4
LT
1295 * socket.
1296 */
60236fdd 1297static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
1298 struct dst_entry *dst)
1299{
2e6599cb 1300 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
1301 int err = -1;
1302 struct sk_buff * skb;
1303
1304 /* First, grab a route. */
1305 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1306 goto out;
1307
1308 skb = tcp_make_synack(sk, dst, req);
1309
1310 if (skb) {
1311 struct tcphdr *th = skb->h.th;
1312
1313 th->check = tcp_v4_check(th, skb->len,
2e6599cb
ACM
1314 ireq->loc_addr,
1315 ireq->rmt_addr,
1da177e4
LT
1316 csum_partial((char *)th, skb->len,
1317 skb->csum));
1318
2e6599cb
ACM
1319 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
1320 ireq->rmt_addr,
1321 ireq->opt);
1da177e4
LT
1322 if (err == NET_XMIT_CN)
1323 err = 0;
1324 }
1325
1326out:
1327 dst_release(dst);
1328 return err;
1329}
1330
1331/*
60236fdd 1332 * IPv4 request_sock destructor.
1da177e4 1333 */
60236fdd 1334static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 1335{
2e6599cb
ACM
1336 if (inet_rsk(req)->opt)
1337 kfree(inet_rsk(req)->opt);
1da177e4
LT
1338}
1339
1340static inline void syn_flood_warning(struct sk_buff *skb)
1341{
1342 static unsigned long warntime;
1343
1344 if (time_after(jiffies, (warntime + HZ * 60))) {
1345 warntime = jiffies;
1346 printk(KERN_INFO
1347 "possible SYN flooding on port %d. Sending cookies.\n",
1348 ntohs(skb->h.th->dest));
1349 }
1350}
1351
1352/*
60236fdd 1353 * Save and compile IPv4 options into the request_sock if needed.
1da177e4
LT
1354 */
1355static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1356 struct sk_buff *skb)
1357{
1358 struct ip_options *opt = &(IPCB(skb)->opt);
1359 struct ip_options *dopt = NULL;
1360
1361 if (opt && opt->optlen) {
1362 int opt_size = optlength(opt);
1363 dopt = kmalloc(opt_size, GFP_ATOMIC);
1364 if (dopt) {
1365 if (ip_options_echo(dopt, skb)) {
1366 kfree(dopt);
1367 dopt = NULL;
1368 }
1369 }
1370 }
1371 return dopt;
1372}
1373
60236fdd 1374struct request_sock_ops tcp_request_sock_ops = {
1da177e4 1375 .family = PF_INET,
2e6599cb 1376 .obj_size = sizeof(struct tcp_request_sock),
1da177e4 1377 .rtx_syn_ack = tcp_v4_send_synack,
60236fdd
ACM
1378 .send_ack = tcp_v4_reqsk_send_ack,
1379 .destructor = tcp_v4_reqsk_destructor,
1da177e4
LT
1380 .send_reset = tcp_v4_send_reset,
1381};
1382
1383int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1384{
2e6599cb 1385 struct inet_request_sock *ireq;
1da177e4 1386 struct tcp_options_received tmp_opt;
60236fdd 1387 struct request_sock *req;
1da177e4
LT
1388 __u32 saddr = skb->nh.iph->saddr;
1389 __u32 daddr = skb->nh.iph->daddr;
1390 __u32 isn = TCP_SKB_CB(skb)->when;
1391 struct dst_entry *dst = NULL;
1392#ifdef CONFIG_SYN_COOKIES
1393 int want_cookie = 0;
1394#else
1395#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1396#endif
1397
1398 /* Never answer to SYNs send to broadcast or multicast */
1399 if (((struct rtable *)skb->dst)->rt_flags &
1400 (RTCF_BROADCAST | RTCF_MULTICAST))
1401 goto drop;
1402
1403 /* TW buckets are converted to open requests without
1404 * limitations, they conserve resources and peer is
1405 * evidently real one.
1406 */
1407 if (tcp_synq_is_full(sk) && !isn) {
1408#ifdef CONFIG_SYN_COOKIES
1409 if (sysctl_tcp_syncookies) {
1410 want_cookie = 1;
1411 } else
1412#endif
1413 goto drop;
1414 }
1415
1416 /* Accept backlog is full. If we have already queued enough
1417 * of warm entries in syn queue, drop request. It is better than
1418 * clogging syn queue with openreqs with exponentially increasing
1419 * timeout.
1420 */
1421 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1422 goto drop;
1423
60236fdd 1424 req = reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1425 if (!req)
1426 goto drop;
1427
1428 tcp_clear_options(&tmp_opt);
1429 tmp_opt.mss_clamp = 536;
1430 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1431
1432 tcp_parse_options(skb, &tmp_opt, 0);
1433
1434 if (want_cookie) {
1435 tcp_clear_options(&tmp_opt);
1436 tmp_opt.saw_tstamp = 0;
1437 }
1438
1439 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1440 /* Some OSes (unknown ones, but I see them on web server, which
1441 * contains information interesting only for windows'
1442 * users) do not send their stamp in SYN. It is easy case.
1443 * We simply do not advertise TS support.
1444 */
1445 tmp_opt.saw_tstamp = 0;
1446 tmp_opt.tstamp_ok = 0;
1447 }
1448 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1449
1450 tcp_openreq_init(req, &tmp_opt, skb);
1451
2e6599cb
ACM
1452 ireq = inet_rsk(req);
1453 ireq->loc_addr = daddr;
1454 ireq->rmt_addr = saddr;
1455 ireq->opt = tcp_v4_save_options(sk, skb);
1da177e4
LT
1456 if (!want_cookie)
1457 TCP_ECN_create_request(req, skb->h.th);
1458
1459 if (want_cookie) {
1460#ifdef CONFIG_SYN_COOKIES
1461 syn_flood_warning(skb);
1462#endif
1463 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1464 } else if (!isn) {
1465 struct inet_peer *peer = NULL;
1466
1467 /* VJ's idea. We save last timestamp seen
1468 * from the destination in peer table, when entering
1469 * state TIME-WAIT, and check against it before
1470 * accepting new connection request.
1471 *
1472 * If "isn" is not zero, this request hit alive
1473 * timewait bucket, so that all the necessary checks
1474 * are made in the function processing timewait state.
1475 */
1476 if (tmp_opt.saw_tstamp &&
1477 sysctl_tcp_tw_recycle &&
1478 (dst = tcp_v4_route_req(sk, req)) != NULL &&
1479 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1480 peer->v4daddr == saddr) {
1481 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1482 (s32)(peer->tcp_ts - req->ts_recent) >
1483 TCP_PAWS_WINDOW) {
1484 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1485 dst_release(dst);
1486 goto drop_and_free;
1487 }
1488 }
1489 /* Kill the following clause, if you dislike this way. */
1490 else if (!sysctl_tcp_syncookies &&
1491 (sysctl_max_syn_backlog - tcp_synq_len(sk) <
1492 (sysctl_max_syn_backlog >> 2)) &&
1493 (!peer || !peer->tcp_ts_stamp) &&
1494 (!dst || !dst_metric(dst, RTAX_RTT))) {
1495 /* Without syncookies last quarter of
1496 * backlog is filled with destinations,
1497 * proven to be alive.
1498 * It means that we continue to communicate
1499 * to destinations, already remembered
1500 * to the moment of synflood.
1501 */
ca933452
HO
1502 LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open "
1503 "request from %u.%u."
1504 "%u.%u/%u\n",
1505 NIPQUAD(saddr),
1506 ntohs(skb->h.th->source)));
1da177e4
LT
1507 dst_release(dst);
1508 goto drop_and_free;
1509 }
1510
1511 isn = tcp_v4_init_sequence(sk, skb);
1512 }
2e6599cb 1513 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1514
1515 if (tcp_v4_send_synack(sk, req, dst))
1516 goto drop_and_free;
1517
1518 if (want_cookie) {
60236fdd 1519 reqsk_free(req);
1da177e4
LT
1520 } else {
1521 tcp_v4_synq_add(sk, req);
1522 }
1523 return 0;
1524
1525drop_and_free:
60236fdd 1526 reqsk_free(req);
1da177e4
LT
1527drop:
1528 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1529 return 0;
1530}
1531
1532
1533/*
1534 * The three way handshake has completed - we got a valid synack -
1535 * now create the new socket.
1536 */
1537struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1538 struct request_sock *req,
1da177e4
LT
1539 struct dst_entry *dst)
1540{
2e6599cb 1541 struct inet_request_sock *ireq;
1da177e4
LT
1542 struct inet_sock *newinet;
1543 struct tcp_sock *newtp;
1544 struct sock *newsk;
1545
1546 if (sk_acceptq_is_full(sk))
1547 goto exit_overflow;
1548
1549 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1550 goto exit;
1551
1552 newsk = tcp_create_openreq_child(sk, req, skb);
1553 if (!newsk)
1554 goto exit;
1555
1556 newsk->sk_dst_cache = dst;
1557 tcp_v4_setup_caps(newsk, dst);
1558
1559 newtp = tcp_sk(newsk);
1560 newinet = inet_sk(newsk);
2e6599cb
ACM
1561 ireq = inet_rsk(req);
1562 newinet->daddr = ireq->rmt_addr;
1563 newinet->rcv_saddr = ireq->loc_addr;
1564 newinet->saddr = ireq->loc_addr;
1565 newinet->opt = ireq->opt;
1566 ireq->opt = NULL;
1da177e4
LT
1567 newinet->mc_index = tcp_v4_iif(skb);
1568 newinet->mc_ttl = skb->nh.iph->ttl;
1569 newtp->ext_header_len = 0;
1570 if (newinet->opt)
1571 newtp->ext_header_len = newinet->opt->optlen;
1572 newinet->id = newtp->write_seq ^ jiffies;
1573
1574 tcp_sync_mss(newsk, dst_mtu(dst));
1575 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1576 tcp_initialize_rcv_mss(newsk);
1577
1578 __tcp_v4_hash(newsk, 0);
1579 __tcp_inherit_port(sk, newsk);
1580
1581 return newsk;
1582
1583exit_overflow:
1584 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1585exit:
1586 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1587 dst_release(dst);
1588 return NULL;
1589}
1590
1591static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1592{
1593 struct tcphdr *th = skb->h.th;
1594 struct iphdr *iph = skb->nh.iph;
1595 struct tcp_sock *tp = tcp_sk(sk);
1596 struct sock *nsk;
60236fdd 1597 struct request_sock **prev;
1da177e4 1598 /* Find possible connection requests. */
60236fdd 1599 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
1da177e4
LT
1600 iph->saddr, iph->daddr);
1601 if (req)
1602 return tcp_check_req(sk, skb, req, prev);
1603
1604 nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1605 th->source,
1606 skb->nh.iph->daddr,
1607 ntohs(th->dest),
1608 tcp_v4_iif(skb));
1609
1610 if (nsk) {
1611 if (nsk->sk_state != TCP_TIME_WAIT) {
1612 bh_lock_sock(nsk);
1613 return nsk;
1614 }
1615 tcp_tw_put((struct tcp_tw_bucket *)nsk);
1616 return NULL;
1617 }
1618
1619#ifdef CONFIG_SYN_COOKIES
1620 if (!th->rst && !th->syn && th->ack)
1621 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1622#endif
1623 return sk;
1624}
1625
1626static int tcp_v4_checksum_init(struct sk_buff *skb)
1627{
1628 if (skb->ip_summed == CHECKSUM_HW) {
1629 skb->ip_summed = CHECKSUM_UNNECESSARY;
1630 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1631 skb->nh.iph->daddr, skb->csum))
1632 return 0;
1633
ca933452 1634 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1da177e4
LT
1635 skb->ip_summed = CHECKSUM_NONE;
1636 }
1637 if (skb->len <= 76) {
1638 if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1639 skb->nh.iph->daddr,
1640 skb_checksum(skb, 0, skb->len, 0)))
1641 return -1;
1642 skb->ip_summed = CHECKSUM_UNNECESSARY;
1643 } else {
1644 skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
1645 skb->nh.iph->saddr,
1646 skb->nh.iph->daddr, 0);
1647 }
1648 return 0;
1649}
1650
1651
1652/* The socket must have it's spinlock held when we get
1653 * here.
1654 *
1655 * We have a potential double-lock case here, so even when
1656 * doing backlog processing we use the BH locking scheme.
1657 * This is because we cannot sleep with the original spinlock
1658 * held.
1659 */
1660int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1661{
1662 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1663 TCP_CHECK_TIMER(sk);
1664 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1665 goto reset;
1666 TCP_CHECK_TIMER(sk);
1667 return 0;
1668 }
1669
1670 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1671 goto csum_err;
1672
1673 if (sk->sk_state == TCP_LISTEN) {
1674 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1675 if (!nsk)
1676 goto discard;
1677
1678 if (nsk != sk) {
1679 if (tcp_child_process(sk, nsk, skb))
1680 goto reset;
1681 return 0;
1682 }
1683 }
1684
1685 TCP_CHECK_TIMER(sk);
1686 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1687 goto reset;
1688 TCP_CHECK_TIMER(sk);
1689 return 0;
1690
1691reset:
1692 tcp_v4_send_reset(skb);
1693discard:
1694 kfree_skb(skb);
1695 /* Be careful here. If this function gets more complicated and
1696 * gcc suffers from register pressure on the x86, sk (in %ebx)
1697 * might be destroyed here. This current version compiles correctly,
1698 * but you have been warned.
1699 */
1700 return 0;
1701
1702csum_err:
1703 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1704 goto discard;
1705}
1706
1707/*
1708 * From tcp_input.c
1709 */
1710
1711int tcp_v4_rcv(struct sk_buff *skb)
1712{
1713 struct tcphdr *th;
1714 struct sock *sk;
1715 int ret;
1716
1717 if (skb->pkt_type != PACKET_HOST)
1718 goto discard_it;
1719
1720 /* Count it even if it's bad */
1721 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1722
1723 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1724 goto discard_it;
1725
1726 th = skb->h.th;
1727
1728 if (th->doff < sizeof(struct tcphdr) / 4)
1729 goto bad_packet;
1730 if (!pskb_may_pull(skb, th->doff * 4))
1731 goto discard_it;
1732
1733 /* An explanation is required here, I think.
1734 * Packet length and doff are validated by header prediction,
1735 * provided case of th->doff==0 is elimineted.
1736 * So, we defer the checks. */
1737 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1738 tcp_v4_checksum_init(skb) < 0))
1739 goto bad_packet;
1740
1741 th = skb->h.th;
1742 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1743 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1744 skb->len - th->doff * 4);
1745 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1746 TCP_SKB_CB(skb)->when = 0;
1747 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1748 TCP_SKB_CB(skb)->sacked = 0;
1749
1750 sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1751 skb->nh.iph->daddr, ntohs(th->dest),
1752 tcp_v4_iif(skb));
1753
1754 if (!sk)
1755 goto no_tcp_socket;
1756
1757process:
1758 if (sk->sk_state == TCP_TIME_WAIT)
1759 goto do_time_wait;
1760
1761 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1762 goto discard_and_relse;
1763
1764 if (sk_filter(sk, skb, 0))
1765 goto discard_and_relse;
1766
1767 skb->dev = NULL;
1768
1769 bh_lock_sock(sk);
1770 ret = 0;
1771 if (!sock_owned_by_user(sk)) {
1772 if (!tcp_prequeue(sk, skb))
1773 ret = tcp_v4_do_rcv(sk, skb);
1774 } else
1775 sk_add_backlog(sk, skb);
1776 bh_unlock_sock(sk);
1777
1778 sock_put(sk);
1779
1780 return ret;
1781
1782no_tcp_socket:
1783 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1784 goto discard_it;
1785
1786 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1787bad_packet:
1788 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1789 } else {
1790 tcp_v4_send_reset(skb);
1791 }
1792
1793discard_it:
1794 /* Discard frame. */
1795 kfree_skb(skb);
1796 return 0;
1797
1798discard_and_relse:
1799 sock_put(sk);
1800 goto discard_it;
1801
1802do_time_wait:
1803 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1804 tcp_tw_put((struct tcp_tw_bucket *) sk);
1805 goto discard_it;
1806 }
1807
1808 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1809 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1810 tcp_tw_put((struct tcp_tw_bucket *) sk);
1811 goto discard_it;
1812 }
1813 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1814 skb, th, skb->len)) {
1815 case TCP_TW_SYN: {
1816 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
1817 ntohs(th->dest),
1818 tcp_v4_iif(skb));
1819 if (sk2) {
1820 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1821 tcp_tw_put((struct tcp_tw_bucket *)sk);
1822 sk = sk2;
1823 goto process;
1824 }
1825 /* Fall through to ACK */
1826 }
1827 case TCP_TW_ACK:
1828 tcp_v4_timewait_ack(sk, skb);
1829 break;
1830 case TCP_TW_RST:
1831 goto no_tcp_socket;
1832 case TCP_TW_SUCCESS:;
1833 }
1834 goto discard_it;
1835}
1836
1da177e4
LT
1837static int tcp_v4_reselect_saddr(struct sock *sk)
1838{
1839 struct inet_sock *inet = inet_sk(sk);
1840 int err;
1841 struct rtable *rt;
1842 __u32 old_saddr = inet->saddr;
1843 __u32 new_saddr;
1844 __u32 daddr = inet->daddr;
1845
1846 if (inet->opt && inet->opt->srr)
1847 daddr = inet->opt->faddr;
1848
1849 /* Query new route. */
1850 err = ip_route_connect(&rt, daddr, 0,
1851 RT_CONN_FLAGS(sk),
1852 sk->sk_bound_dev_if,
1853 IPPROTO_TCP,
1854 inet->sport, inet->dport, sk);
1855 if (err)
1856 return err;
1857
1858 __sk_dst_set(sk, &rt->u.dst);
1859 tcp_v4_setup_caps(sk, &rt->u.dst);
1860
1861 new_saddr = rt->rt_src;
1862
1863 if (new_saddr == old_saddr)
1864 return 0;
1865
1866 if (sysctl_ip_dynaddr > 1) {
1867 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1868 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1869 NIPQUAD(old_saddr),
1870 NIPQUAD(new_saddr));
1871 }
1872
1873 inet->saddr = new_saddr;
1874 inet->rcv_saddr = new_saddr;
1875
1876 /* XXX The only one ugly spot where we need to
1877 * XXX really change the sockets identity after
1878 * XXX it has entered the hashes. -DaveM
1879 *
1880 * Besides that, it does not check for connection
1881 * uniqueness. Wait for troubles.
1882 */
614c6cb4 1883 __sk_prot_rehash(sk);
1da177e4
LT
1884 return 0;
1885}
1886
1887int tcp_v4_rebuild_header(struct sock *sk)
1888{
1889 struct inet_sock *inet = inet_sk(sk);
1890 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1891 u32 daddr;
1892 int err;
1893
1894 /* Route is OK, nothing to do. */
1895 if (rt)
1896 return 0;
1897
1898 /* Reroute. */
1899 daddr = inet->daddr;
1900 if (inet->opt && inet->opt->srr)
1901 daddr = inet->opt->faddr;
1902
1903 {
1904 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1905 .nl_u = { .ip4_u =
1906 { .daddr = daddr,
1907 .saddr = inet->saddr,
1908 .tos = RT_CONN_FLAGS(sk) } },
1909 .proto = IPPROTO_TCP,
1910 .uli_u = { .ports =
1911 { .sport = inet->sport,
1912 .dport = inet->dport } } };
1913
1914 err = ip_route_output_flow(&rt, &fl, sk, 0);
1915 }
1916 if (!err) {
1917 __sk_dst_set(sk, &rt->u.dst);
1918 tcp_v4_setup_caps(sk, &rt->u.dst);
1919 return 0;
1920 }
1921
1922 /* Routing failed... */
1923 sk->sk_route_caps = 0;
1924
1925 if (!sysctl_ip_dynaddr ||
1926 sk->sk_state != TCP_SYN_SENT ||
1927 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1928 (err = tcp_v4_reselect_saddr(sk)) != 0)
1929 sk->sk_err_soft = -err;
1930
1931 return err;
1932}
1933
1934static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1935{
1936 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1937 struct inet_sock *inet = inet_sk(sk);
1938
1939 sin->sin_family = AF_INET;
1940 sin->sin_addr.s_addr = inet->daddr;
1941 sin->sin_port = inet->dport;
1942}
1943
1944/* VJ's idea. Save last timestamp seen from this destination
1945 * and hold it at least for normal timewait interval to use for duplicate
1946 * segment detection in subsequent connections, before they enter synchronized
1947 * state.
1948 */
1949
1950int tcp_v4_remember_stamp(struct sock *sk)
1951{
1952 struct inet_sock *inet = inet_sk(sk);
1953 struct tcp_sock *tp = tcp_sk(sk);
1954 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1955 struct inet_peer *peer = NULL;
1956 int release_it = 0;
1957
1958 if (!rt || rt->rt_dst != inet->daddr) {
1959 peer = inet_getpeer(inet->daddr, 1);
1960 release_it = 1;
1961 } else {
1962 if (!rt->peer)
1963 rt_bind_peer(rt, 1);
1964 peer = rt->peer;
1965 }
1966
1967 if (peer) {
1968 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1969 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1970 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1971 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1972 peer->tcp_ts = tp->rx_opt.ts_recent;
1973 }
1974 if (release_it)
1975 inet_putpeer(peer);
1976 return 1;
1977 }
1978
1979 return 0;
1980}
1981
1982int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
1983{
1984 struct inet_peer *peer = NULL;
1985
1986 peer = inet_getpeer(tw->tw_daddr, 1);
1987
1988 if (peer) {
1989 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
1990 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1991 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
1992 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
1993 peer->tcp_ts = tw->tw_ts_recent;
1994 }
1995 inet_putpeer(peer);
1996 return 1;
1997 }
1998
1999 return 0;
2000}
2001
2002struct tcp_func ipv4_specific = {
2003 .queue_xmit = ip_queue_xmit,
2004 .send_check = tcp_v4_send_check,
2005 .rebuild_header = tcp_v4_rebuild_header,
2006 .conn_request = tcp_v4_conn_request,
2007 .syn_recv_sock = tcp_v4_syn_recv_sock,
2008 .remember_stamp = tcp_v4_remember_stamp,
2009 .net_header_len = sizeof(struct iphdr),
2010 .setsockopt = ip_setsockopt,
2011 .getsockopt = ip_getsockopt,
2012 .addr2sockaddr = v4_addr2sockaddr,
2013 .sockaddr_len = sizeof(struct sockaddr_in),
2014};
2015
2016/* NOTE: A lot of things set to zero explicitly by call to
2017 * sk_alloc() so need not be done here.
2018 */
2019static int tcp_v4_init_sock(struct sock *sk)
2020{
2021 struct tcp_sock *tp = tcp_sk(sk);
2022
2023 skb_queue_head_init(&tp->out_of_order_queue);
2024 tcp_init_xmit_timers(sk);
2025 tcp_prequeue_init(tp);
2026
2027 tp->rto = TCP_TIMEOUT_INIT;
2028 tp->mdev = TCP_TIMEOUT_INIT;
2029
2030 /* So many TCP implementations out there (incorrectly) count the
2031 * initial SYN frame in their delayed-ACK and congestion control
2032 * algorithms that we must have the following bandaid to talk
2033 * efficiently to them. -DaveM
2034 */
2035 tp->snd_cwnd = 2;
2036
2037 /* See draft-stevens-tcpca-spec-01 for discussion of the
2038 * initialization of these values.
2039 */
2040 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
2041 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 2042 tp->mss_cache = 536;
1da177e4
LT
2043
2044 tp->reordering = sysctl_tcp_reordering;
5f8ef48d 2045 tp->ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
2046
2047 sk->sk_state = TCP_CLOSE;
2048
2049 sk->sk_write_space = sk_stream_write_space;
2050 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2051
2052 tp->af_specific = &ipv4_specific;
2053
2054 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2055 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2056
2057 atomic_inc(&tcp_sockets_allocated);
2058
2059 return 0;
2060}
2061
2062int tcp_v4_destroy_sock(struct sock *sk)
2063{
2064 struct tcp_sock *tp = tcp_sk(sk);
2065
2066 tcp_clear_xmit_timers(sk);
2067
317a76f9
SH
2068 tcp_cleanup_congestion_control(tp);
2069
1da177e4
LT
2070 /* Cleanup up the write buffer. */
2071 sk_stream_writequeue_purge(sk);
2072
2073 /* Cleans up our, hopefully empty, out_of_order_queue. */
2074 __skb_queue_purge(&tp->out_of_order_queue);
2075
2076 /* Clean prequeue, it must be empty really */
2077 __skb_queue_purge(&tp->ucopy.prequeue);
2078
2079 /* Clean up a referenced TCP bind bucket. */
2080 if (tp->bind_hash)
2081 tcp_put_port(sk);
2082
2083 /*
2084 * If sendmsg cached page exists, toss it.
2085 */
2086 if (sk->sk_sndmsg_page) {
2087 __free_page(sk->sk_sndmsg_page);
2088 sk->sk_sndmsg_page = NULL;
2089 }
2090
2091 atomic_dec(&tcp_sockets_allocated);
2092
2093 return 0;
2094}
2095
2096EXPORT_SYMBOL(tcp_v4_destroy_sock);
2097
2098#ifdef CONFIG_PROC_FS
2099/* Proc filesystem TCP sock list dumping. */
2100
2101static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
2102{
2103 return hlist_empty(head) ? NULL :
2104 list_entry(head->first, struct tcp_tw_bucket, tw_node);
2105}
2106
2107static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2108{
2109 return tw->tw_node.next ?
2110 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2111}
2112
2113static void *listening_get_next(struct seq_file *seq, void *cur)
2114{
2115 struct tcp_sock *tp;
2116 struct hlist_node *node;
2117 struct sock *sk = cur;
2118 struct tcp_iter_state* st = seq->private;
2119
2120 if (!sk) {
2121 st->bucket = 0;
2122 sk = sk_head(&tcp_listening_hash[0]);
2123 goto get_sk;
2124 }
2125
2126 ++st->num;
2127
2128 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2129 struct request_sock *req = cur;
1da177e4
LT
2130
2131 tp = tcp_sk(st->syn_wait_sk);
2132 req = req->dl_next;
2133 while (1) {
2134 while (req) {
60236fdd 2135 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2136 cur = req;
2137 goto out;
2138 }
2139 req = req->dl_next;
2140 }
2141 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2142 break;
2143get_req:
0e87506f 2144 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
2145 }
2146 sk = sk_next(st->syn_wait_sk);
2147 st->state = TCP_SEQ_STATE_LISTENING;
0e87506f 2148 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
1da177e4
LT
2149 } else {
2150 tp = tcp_sk(sk);
0e87506f
ACM
2151 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2152 if (reqsk_queue_len(&tp->accept_queue))
1da177e4 2153 goto start_req;
0e87506f 2154 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
1da177e4
LT
2155 sk = sk_next(sk);
2156 }
2157get_sk:
2158 sk_for_each_from(sk, node) {
2159 if (sk->sk_family == st->family) {
2160 cur = sk;
2161 goto out;
2162 }
2163 tp = tcp_sk(sk);
0e87506f
ACM
2164 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2165 if (reqsk_queue_len(&tp->accept_queue)) {
1da177e4
LT
2166start_req:
2167 st->uid = sock_i_uid(sk);
2168 st->syn_wait_sk = sk;
2169 st->state = TCP_SEQ_STATE_OPENREQ;
2170 st->sbucket = 0;
2171 goto get_req;
2172 }
0e87506f 2173 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
1da177e4
LT
2174 }
2175 if (++st->bucket < TCP_LHTABLE_SIZE) {
2176 sk = sk_head(&tcp_listening_hash[st->bucket]);
2177 goto get_sk;
2178 }
2179 cur = NULL;
2180out:
2181 return cur;
2182}
2183
2184static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2185{
2186 void *rc = listening_get_next(seq, NULL);
2187
2188 while (rc && *pos) {
2189 rc = listening_get_next(seq, rc);
2190 --*pos;
2191 }
2192 return rc;
2193}
2194
2195static void *established_get_first(struct seq_file *seq)
2196{
2197 struct tcp_iter_state* st = seq->private;
2198 void *rc = NULL;
2199
2200 for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
2201 struct sock *sk;
2202 struct hlist_node *node;
2203 struct tcp_tw_bucket *tw;
2204
2205 /* We can reschedule _before_ having picked the target: */
2206 cond_resched_softirq();
2207
2208 read_lock(&tcp_ehash[st->bucket].lock);
2209 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
2210 if (sk->sk_family != st->family) {
2211 continue;
2212 }
2213 rc = sk;
2214 goto out;
2215 }
2216 st->state = TCP_SEQ_STATE_TIME_WAIT;
2217 tw_for_each(tw, node,
2218 &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
2219 if (tw->tw_family != st->family) {
2220 continue;
2221 }
2222 rc = tw;
2223 goto out;
2224 }
2225 read_unlock(&tcp_ehash[st->bucket].lock);
2226 st->state = TCP_SEQ_STATE_ESTABLISHED;
2227 }
2228out:
2229 return rc;
2230}
2231
2232static void *established_get_next(struct seq_file *seq, void *cur)
2233{
2234 struct sock *sk = cur;
2235 struct tcp_tw_bucket *tw;
2236 struct hlist_node *node;
2237 struct tcp_iter_state* st = seq->private;
2238
2239 ++st->num;
2240
2241 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2242 tw = cur;
2243 tw = tw_next(tw);
2244get_tw:
2245 while (tw && tw->tw_family != st->family) {
2246 tw = tw_next(tw);
2247 }
2248 if (tw) {
2249 cur = tw;
2250 goto out;
2251 }
2252 read_unlock(&tcp_ehash[st->bucket].lock);
2253 st->state = TCP_SEQ_STATE_ESTABLISHED;
2254
2255 /* We can reschedule between buckets: */
2256 cond_resched_softirq();
2257
2258 if (++st->bucket < tcp_ehash_size) {
2259 read_lock(&tcp_ehash[st->bucket].lock);
2260 sk = sk_head(&tcp_ehash[st->bucket].chain);
2261 } else {
2262 cur = NULL;
2263 goto out;
2264 }
2265 } else
2266 sk = sk_next(sk);
2267
2268 sk_for_each_from(sk, node) {
2269 if (sk->sk_family == st->family)
2270 goto found;
2271 }
2272
2273 st->state = TCP_SEQ_STATE_TIME_WAIT;
2274 tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
2275 goto get_tw;
2276found:
2277 cur = sk;
2278out:
2279 return cur;
2280}
2281
2282static void *established_get_idx(struct seq_file *seq, loff_t pos)
2283{
2284 void *rc = established_get_first(seq);
2285
2286 while (rc && pos) {
2287 rc = established_get_next(seq, rc);
2288 --pos;
2289 }
2290 return rc;
2291}
2292
2293static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2294{
2295 void *rc;
2296 struct tcp_iter_state* st = seq->private;
2297
2298 tcp_listen_lock();
2299 st->state = TCP_SEQ_STATE_LISTENING;
2300 rc = listening_get_idx(seq, &pos);
2301
2302 if (!rc) {
2303 tcp_listen_unlock();
2304 local_bh_disable();
2305 st->state = TCP_SEQ_STATE_ESTABLISHED;
2306 rc = established_get_idx(seq, pos);
2307 }
2308
2309 return rc;
2310}
2311
2312static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2313{
2314 struct tcp_iter_state* st = seq->private;
2315 st->state = TCP_SEQ_STATE_LISTENING;
2316 st->num = 0;
2317 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2318}
2319
2320static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2321{
2322 void *rc = NULL;
2323 struct tcp_iter_state* st;
2324
2325 if (v == SEQ_START_TOKEN) {
2326 rc = tcp_get_idx(seq, 0);
2327 goto out;
2328 }
2329 st = seq->private;
2330
2331 switch (st->state) {
2332 case TCP_SEQ_STATE_OPENREQ:
2333 case TCP_SEQ_STATE_LISTENING:
2334 rc = listening_get_next(seq, v);
2335 if (!rc) {
2336 tcp_listen_unlock();
2337 local_bh_disable();
2338 st->state = TCP_SEQ_STATE_ESTABLISHED;
2339 rc = established_get_first(seq);
2340 }
2341 break;
2342 case TCP_SEQ_STATE_ESTABLISHED:
2343 case TCP_SEQ_STATE_TIME_WAIT:
2344 rc = established_get_next(seq, v);
2345 break;
2346 }
2347out:
2348 ++*pos;
2349 return rc;
2350}
2351
2352static void tcp_seq_stop(struct seq_file *seq, void *v)
2353{
2354 struct tcp_iter_state* st = seq->private;
2355
2356 switch (st->state) {
2357 case TCP_SEQ_STATE_OPENREQ:
2358 if (v) {
2359 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
0e87506f 2360 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
1da177e4
LT
2361 }
2362 case TCP_SEQ_STATE_LISTENING:
2363 if (v != SEQ_START_TOKEN)
2364 tcp_listen_unlock();
2365 break;
2366 case TCP_SEQ_STATE_TIME_WAIT:
2367 case TCP_SEQ_STATE_ESTABLISHED:
2368 if (v)
2369 read_unlock(&tcp_ehash[st->bucket].lock);
2370 local_bh_enable();
2371 break;
2372 }
2373}
2374
2375static int tcp_seq_open(struct inode *inode, struct file *file)
2376{
2377 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2378 struct seq_file *seq;
2379 struct tcp_iter_state *s;
2380 int rc;
2381
2382 if (unlikely(afinfo == NULL))
2383 return -EINVAL;
2384
2385 s = kmalloc(sizeof(*s), GFP_KERNEL);
2386 if (!s)
2387 return -ENOMEM;
2388 memset(s, 0, sizeof(*s));
2389 s->family = afinfo->family;
2390 s->seq_ops.start = tcp_seq_start;
2391 s->seq_ops.next = tcp_seq_next;
2392 s->seq_ops.show = afinfo->seq_show;
2393 s->seq_ops.stop = tcp_seq_stop;
2394
2395 rc = seq_open(file, &s->seq_ops);
2396 if (rc)
2397 goto out_kfree;
2398 seq = file->private_data;
2399 seq->private = s;
2400out:
2401 return rc;
2402out_kfree:
2403 kfree(s);
2404 goto out;
2405}
2406
2407int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2408{
2409 int rc = 0;
2410 struct proc_dir_entry *p;
2411
2412 if (!afinfo)
2413 return -EINVAL;
2414 afinfo->seq_fops->owner = afinfo->owner;
2415 afinfo->seq_fops->open = tcp_seq_open;
2416 afinfo->seq_fops->read = seq_read;
2417 afinfo->seq_fops->llseek = seq_lseek;
2418 afinfo->seq_fops->release = seq_release_private;
2419
2420 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2421 if (p)
2422 p->data = afinfo;
2423 else
2424 rc = -ENOMEM;
2425 return rc;
2426}
2427
2428void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2429{
2430 if (!afinfo)
2431 return;
2432 proc_net_remove(afinfo->name);
2433 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2434}
2435
60236fdd 2436static void get_openreq4(struct sock *sk, struct request_sock *req,
1da177e4
LT
2437 char *tmpbuf, int i, int uid)
2438{
2e6599cb 2439 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2440 int ttd = req->expires - jiffies;
2441
2442 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2443 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2444 i,
2e6599cb 2445 ireq->loc_addr,
1da177e4 2446 ntohs(inet_sk(sk)->sport),
2e6599cb
ACM
2447 ireq->rmt_addr,
2448 ntohs(ireq->rmt_port),
1da177e4
LT
2449 TCP_SYN_RECV,
2450 0, 0, /* could print option size, but that is af dependent. */
2451 1, /* timers active (only the expire timer) */
2452 jiffies_to_clock_t(ttd),
2453 req->retrans,
2454 uid,
2455 0, /* non standard timer */
2456 0, /* open_requests have no inode */
2457 atomic_read(&sk->sk_refcnt),
2458 req);
2459}
2460
2461static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2462{
2463 int timer_active;
2464 unsigned long timer_expires;
2465 struct tcp_sock *tp = tcp_sk(sp);
2466 struct inet_sock *inet = inet_sk(sp);
2467 unsigned int dest = inet->daddr;
2468 unsigned int src = inet->rcv_saddr;
2469 __u16 destp = ntohs(inet->dport);
2470 __u16 srcp = ntohs(inet->sport);
2471
2472 if (tp->pending == TCP_TIME_RETRANS) {
2473 timer_active = 1;
2474 timer_expires = tp->timeout;
2475 } else if (tp->pending == TCP_TIME_PROBE0) {
2476 timer_active = 4;
2477 timer_expires = tp->timeout;
2478 } else if (timer_pending(&sp->sk_timer)) {
2479 timer_active = 2;
2480 timer_expires = sp->sk_timer.expires;
2481 } else {
2482 timer_active = 0;
2483 timer_expires = jiffies;
2484 }
2485
2486 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2487 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2488 i, src, srcp, dest, destp, sp->sk_state,
2489 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2490 timer_active,
2491 jiffies_to_clock_t(timer_expires - jiffies),
2492 tp->retransmits,
2493 sock_i_uid(sp),
2494 tp->probes_out,
2495 sock_i_ino(sp),
2496 atomic_read(&sp->sk_refcnt), sp,
2497 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
2498 tp->snd_cwnd,
2499 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2500}
2501
2502static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2503{
2504 unsigned int dest, src;
2505 __u16 destp, srcp;
2506 int ttd = tw->tw_ttd - jiffies;
2507
2508 if (ttd < 0)
2509 ttd = 0;
2510
2511 dest = tw->tw_daddr;
2512 src = tw->tw_rcv_saddr;
2513 destp = ntohs(tw->tw_dport);
2514 srcp = ntohs(tw->tw_sport);
2515
2516 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2517 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2518 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2519 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2520 atomic_read(&tw->tw_refcnt), tw);
2521}
2522
2523#define TMPSZ 150
2524
2525static int tcp4_seq_show(struct seq_file *seq, void *v)
2526{
2527 struct tcp_iter_state* st;
2528 char tmpbuf[TMPSZ + 1];
2529
2530 if (v == SEQ_START_TOKEN) {
2531 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2532 " sl local_address rem_address st tx_queue "
2533 "rx_queue tr tm->when retrnsmt uid timeout "
2534 "inode");
2535 goto out;
2536 }
2537 st = seq->private;
2538
2539 switch (st->state) {
2540 case TCP_SEQ_STATE_LISTENING:
2541 case TCP_SEQ_STATE_ESTABLISHED:
2542 get_tcp4_sock(v, tmpbuf, st->num);
2543 break;
2544 case TCP_SEQ_STATE_OPENREQ:
2545 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2546 break;
2547 case TCP_SEQ_STATE_TIME_WAIT:
2548 get_timewait4_sock(v, tmpbuf, st->num);
2549 break;
2550 }
2551 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2552out:
2553 return 0;
2554}
2555
2556static struct file_operations tcp4_seq_fops;
2557static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2558 .owner = THIS_MODULE,
2559 .name = "tcp",
2560 .family = AF_INET,
2561 .seq_show = tcp4_seq_show,
2562 .seq_fops = &tcp4_seq_fops,
2563};
2564
2565int __init tcp4_proc_init(void)
2566{
2567 return tcp_proc_register(&tcp4_seq_afinfo);
2568}
2569
2570void tcp4_proc_exit(void)
2571{
2572 tcp_proc_unregister(&tcp4_seq_afinfo);
2573}
2574#endif /* CONFIG_PROC_FS */
2575
2576struct proto tcp_prot = {
2577 .name = "TCP",
2578 .owner = THIS_MODULE,
2579 .close = tcp_close,
2580 .connect = tcp_v4_connect,
2581 .disconnect = tcp_disconnect,
2582 .accept = tcp_accept,
2583 .ioctl = tcp_ioctl,
2584 .init = tcp_v4_init_sock,
2585 .destroy = tcp_v4_destroy_sock,
2586 .shutdown = tcp_shutdown,
2587 .setsockopt = tcp_setsockopt,
2588 .getsockopt = tcp_getsockopt,
2589 .sendmsg = tcp_sendmsg,
2590 .recvmsg = tcp_recvmsg,
2591 .backlog_rcv = tcp_v4_do_rcv,
2592 .hash = tcp_v4_hash,
2593 .unhash = tcp_unhash,
2594 .get_port = tcp_v4_get_port,
2595 .enter_memory_pressure = tcp_enter_memory_pressure,
2596 .sockets_allocated = &tcp_sockets_allocated,
2597 .memory_allocated = &tcp_memory_allocated,
2598 .memory_pressure = &tcp_memory_pressure,
2599 .sysctl_mem = sysctl_tcp_mem,
2600 .sysctl_wmem = sysctl_tcp_wmem,
2601 .sysctl_rmem = sysctl_tcp_rmem,
2602 .max_header = MAX_TCP_HEADER,
2603 .obj_size = sizeof(struct tcp_sock),
60236fdd 2604 .rsk_prot = &tcp_request_sock_ops,
1da177e4
LT
2605};
2606
2607
2608
2609void __init tcp_v4_init(struct net_proto_family *ops)
2610{
2611 int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2612 if (err < 0)
2613 panic("Failed to create the TCP control socket.\n");
2614 tcp_socket->sk->sk_allocation = GFP_ATOMIC;
2615 inet_sk(tcp_socket->sk)->uc_ttl = -1;
2616
2617 /* Unhash it so that IP input processing does not even
2618 * see it, we do not wish this socket to see incoming
2619 * packets.
2620 */
2621 tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2622}
2623
2624EXPORT_SYMBOL(ipv4_specific);
2625EXPORT_SYMBOL(tcp_bind_hash);
2626EXPORT_SYMBOL(tcp_bucket_create);
2627EXPORT_SYMBOL(tcp_hashinfo);
2628EXPORT_SYMBOL(tcp_inherit_port);
2629EXPORT_SYMBOL(tcp_listen_wlock);
2630EXPORT_SYMBOL(tcp_port_rover);
2631EXPORT_SYMBOL(tcp_prot);
2632EXPORT_SYMBOL(tcp_put_port);
2633EXPORT_SYMBOL(tcp_unhash);
2634EXPORT_SYMBOL(tcp_v4_conn_request);
2635EXPORT_SYMBOL(tcp_v4_connect);
2636EXPORT_SYMBOL(tcp_v4_do_rcv);
2637EXPORT_SYMBOL(tcp_v4_rebuild_header);
2638EXPORT_SYMBOL(tcp_v4_remember_stamp);
2639EXPORT_SYMBOL(tcp_v4_send_check);
2640EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2641
2642#ifdef CONFIG_PROC_FS
2643EXPORT_SYMBOL(tcp_proc_register);
2644EXPORT_SYMBOL(tcp_proc_unregister);
2645#endif
2646EXPORT_SYMBOL(sysctl_local_port_range);
1da177e4
LT
2647EXPORT_SYMBOL(sysctl_tcp_low_latency);
2648EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2649