]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/netfilter/ip_conntrack_proto_tcp.c
[IP]: Introduce ip_hdrlen()
[net-next-2.6.git] / net / ipv4 / netfilter / ip_conntrack_proto_tcp.c
CommitLineData
1da177e4
LT
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
9 * - Real stateful connection tracking
10 * - Modified state transitions table
11 * - Window scaling support added
12 * - SACK support added
13 *
14 * Willy Tarreau:
15 * - State table bugfixes
16 * - More robust state changes
17 * - Tuning timer parameters
18 *
19 * version 2.2
20 */
21
1da177e4 22#include <linux/types.h>
1da177e4
LT
23#include <linux/timer.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/in.h>
27#include <linux/ip.h>
28#include <linux/tcp.h>
29#include <linux/spinlock.h>
30
31#include <net/tcp.h>
32
1da177e4
LT
33#include <linux/netfilter_ipv4.h>
34#include <linux/netfilter_ipv4/ip_conntrack.h>
35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
1da177e4
LT
36
37#if 0
38#define DEBUGP printk
39#define DEBUGP_VARS
40#else
41#define DEBUGP(format, args...)
42#endif
43
44/* Protects conntrack->proto.tcp */
e45b1be8 45static DEFINE_RWLOCK(tcp_lock);
1da177e4 46
e905a9ed
YH
47/* "Be conservative in what you do,
48 be liberal in what you accept from others."
1da177e4 49 If it's non-zero, we mark only out of window RST segments as INVALID. */
94aec08e 50int ip_ct_tcp_be_liberal __read_mostly = 0;
1da177e4 51
a09113c2 52/* If it is set to zero, we disable picking up already established
1da177e4 53 connections. */
a09113c2 54int ip_ct_tcp_loose __read_mostly = 1;
1da177e4 55
e905a9ed
YH
56/* Max number of the retransmitted packets without receiving an (acceptable)
57 ACK from the destination. If this number is reached, a shorter timer
1da177e4 58 will be started. */
94aec08e 59int ip_ct_tcp_max_retrans __read_mostly = 3;
1da177e4
LT
60
61 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
62 closely. They're more complex. --RR */
63
64static const char *tcp_conntrack_names[] = {
65 "NONE",
66 "SYN_SENT",
67 "SYN_RECV",
68 "ESTABLISHED",
69 "FIN_WAIT",
70 "CLOSE_WAIT",
71 "LAST_ACK",
72 "TIME_WAIT",
73 "CLOSE",
74 "LISTEN"
75};
e905a9ed 76
1da177e4
LT
77#define SECS * HZ
78#define MINS * 60 SECS
79#define HOURS * 60 MINS
80#define DAYS * 24 HOURS
81
94aec08e
BH
82unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
83unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
84unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
85unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
86unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
87unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
88unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
89unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
1da177e4
LT
90
91/* RFC1122 says the R2 limit should be at least 100 seconds.
e905a9ed 92 Linux uses 15 packets as limit, which corresponds
1da177e4 93 to ~13-30min depending on RTO. */
94aec08e 94unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
e905a9ed 95
babbdb1a 96static const unsigned int * tcp_timeouts[]
1da177e4
LT
97= { NULL, /* TCP_CONNTRACK_NONE */
98 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
99 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
100 &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
101 &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
102 &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
103 &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
104 &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
105 &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
106 NULL, /* TCP_CONNTRACK_LISTEN */
107 };
e905a9ed 108
1da177e4
LT
109#define sNO TCP_CONNTRACK_NONE
110#define sSS TCP_CONNTRACK_SYN_SENT
111#define sSR TCP_CONNTRACK_SYN_RECV
112#define sES TCP_CONNTRACK_ESTABLISHED
113#define sFW TCP_CONNTRACK_FIN_WAIT
114#define sCW TCP_CONNTRACK_CLOSE_WAIT
115#define sLA TCP_CONNTRACK_LAST_ACK
116#define sTW TCP_CONNTRACK_TIME_WAIT
117#define sCL TCP_CONNTRACK_CLOSE
118#define sLI TCP_CONNTRACK_LISTEN
119#define sIV TCP_CONNTRACK_MAX
120#define sIG TCP_CONNTRACK_IGNORE
121
122/* What TCP flags are set from RST/SYN/FIN/ACK. */
123enum tcp_bit_set {
124 TCP_SYN_SET,
125 TCP_SYNACK_SET,
126 TCP_FIN_SET,
127 TCP_ACK_SET,
128 TCP_RST_SET,
129 TCP_NONE_SET,
130};
e905a9ed 131
1da177e4
LT
132/*
133 * The TCP state transition table needs a few words...
134 *
135 * We are the man in the middle. All the packets go through us
136 * but might get lost in transit to the destination.
e905a9ed 137 * It is assumed that the destinations can't receive segments
1da177e4
LT
138 * we haven't seen.
139 *
140 * The checked segment is in window, but our windows are *not*
141 * equivalent with the ones of the sender/receiver. We always
142 * try to guess the state of the current sender.
143 *
144 * The meaning of the states are:
145 *
146 * NONE: initial state
e905a9ed 147 * SYN_SENT: SYN-only packet seen
1da177e4
LT
148 * SYN_RECV: SYN-ACK packet seen
149 * ESTABLISHED: ACK packet seen
150 * FIN_WAIT: FIN packet seen
e905a9ed 151 * CLOSE_WAIT: ACK seen (after FIN)
1da177e4
LT
152 * LAST_ACK: FIN seen (after FIN)
153 * TIME_WAIT: last ACK seen
154 * CLOSE: closed connection
155 *
156 * LISTEN state is not used.
157 *
158 * Packets marked as IGNORED (sIG):
e905a9ed
YH
159 * if they may be either invalid or valid
160 * and the receiver may send back a connection
1da177e4
LT
161 * closing RST or a SYN/ACK.
162 *
163 * Packets marked as INVALID (sIV):
164 * if they are invalid
165 * or we do not support the request (simultaneous open)
166 */
9b5b5cff 167static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
1da177e4
LT
168 {
169/* ORIGINAL */
170/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
171/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
172/*
173 * sNO -> sSS Initialize a new connection
174 * sSS -> sSS Retransmitted SYN
175 * sSR -> sIG Late retransmitted SYN?
176 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
e905a9ed 177 * are errors. Receiver will reply with RST
1da177e4
LT
178 * and close the connection.
179 * Or we are not in sync and hold a dead connection.
180 * sFW -> sIG
181 * sCW -> sIG
182 * sLA -> sIG
183 * sTW -> sSS Reopened connection (RFC 1122).
184 * sCL -> sSS
185 */
186/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
187/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
188/*
189 * A SYN/ACK from the client is always invalid:
e905a9ed 190 * - either it tries to set up a simultaneous open, which is
1da177e4
LT
191 * not supported;
192 * - or the firewall has just been inserted between the two hosts
e905a9ed 193 * during the session set-up. The SYN will be retransmitted
1da177e4
LT
194 * by the true client (or it'll time out).
195 */
196/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
197/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
198/*
199 * sNO -> sIV Too late and no reason to do anything...
200 * sSS -> sIV Client migth not send FIN in this state:
201 * we enforce waiting for a SYN/ACK reply first.
202 * sSR -> sFW Close started.
e905a9ed 203 * sES -> sFW
1da177e4 204 * sFW -> sLA FIN seen in both directions, waiting for
e905a9ed 205 * the last ACK.
1da177e4
LT
206 * Migth be a retransmitted FIN as well...
207 * sCW -> sLA
208 * sLA -> sLA Retransmitted FIN. Remain in the same state.
209 * sTW -> sTW
210 * sCL -> sCL
211 */
212/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
213/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
214/*
215 * sNO -> sES Assumed.
216 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
217 * sSR -> sES Established state is reached.
218 * sES -> sES :-)
219 * sFW -> sCW Normal close request answered by ACK.
220 * sCW -> sCW
221 * sLA -> sTW Last ACK detected.
222 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
223 * sCL -> sCL
224 */
225/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
226/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
227/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
228 },
229 {
230/* REPLY */
231/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
232/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
233/*
234 * sNO -> sIV Never reached.
235 * sSS -> sIV Simultaneous open, not supported
236 * sSR -> sIV Simultaneous open, not supported.
237 * sES -> sIV Server may not initiate a connection.
238 * sFW -> sIV
239 * sCW -> sIV
240 * sLA -> sIV
241 * sTW -> sIV Reopened connection, but server may not do it.
242 * sCL -> sIV
243 */
244/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
245/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
246/*
247 * sSS -> sSR Standard open.
248 * sSR -> sSR Retransmitted SYN/ACK.
249 * sES -> sIG Late retransmitted SYN/ACK?
250 * sFW -> sIG Might be SYN/ACK answering ignored SYN
251 * sCW -> sIG
252 * sLA -> sIG
253 * sTW -> sIG
254 * sCL -> sIG
255 */
256/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
257/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
258/*
259 * sSS -> sIV Server might not send FIN in this state.
260 * sSR -> sFW Close started.
261 * sES -> sFW
262 * sFW -> sLA FIN seen in both directions.
263 * sCW -> sLA
264 * sLA -> sLA Retransmitted FIN.
265 * sTW -> sTW
266 * sCL -> sCL
267 */
268/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
73f30602 269/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
1da177e4 270/*
73f30602 271 * sSS -> sIG Might be a half-open connection.
1da177e4
LT
272 * sSR -> sSR Might answer late resent SYN.
273 * sES -> sES :-)
274 * sFW -> sCW Normal close request answered by ACK.
275 * sCW -> sCW
276 * sLA -> sTW Last ACK detected.
277 * sTW -> sTW Retransmitted last ACK.
278 * sCL -> sCL
279 */
280/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
281/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
282/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
e905a9ed 283 }
1da177e4
LT
284};
285
286static int tcp_pkt_to_tuple(const struct sk_buff *skb,
287 unsigned int dataoff,
288 struct ip_conntrack_tuple *tuple)
289{
290 struct tcphdr _hdr, *hp;
291
292 /* Actually only need first 8 bytes. */
293 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
294 if (hp == NULL)
295 return 0;
296
297 tuple->src.u.tcp.port = hp->source;
298 tuple->dst.u.tcp.port = hp->dest;
299
300 return 1;
301}
302
303static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
304 const struct ip_conntrack_tuple *orig)
305{
306 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
307 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
308 return 1;
309}
310
311/* Print out the per-protocol part of the tuple. */
312static int tcp_print_tuple(struct seq_file *s,
313 const struct ip_conntrack_tuple *tuple)
314{
315 return seq_printf(s, "sport=%hu dport=%hu ",
316 ntohs(tuple->src.u.tcp.port),
317 ntohs(tuple->dst.u.tcp.port));
318}
319
320/* Print out the private part of the conntrack. */
321static int tcp_print_conntrack(struct seq_file *s,
322 const struct ip_conntrack *conntrack)
323{
324 enum tcp_conntrack state;
325
e45b1be8 326 read_lock_bh(&tcp_lock);
1da177e4 327 state = conntrack->proto.tcp.state;
e45b1be8 328 read_unlock_bh(&tcp_lock);
1da177e4
LT
329
330 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
331}
332
080774a2
HW
333#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
334 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
335static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
336 const struct ip_conntrack *ct)
337{
266c8543 338 struct nfattr *nest_parms;
e905a9ed 339
080774a2 340 read_lock_bh(&tcp_lock);
266c8543 341 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
080774a2
HW
342 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
343 &ct->proto.tcp.state);
344 read_unlock_bh(&tcp_lock);
345
e1c73b78
PNA
346 NFA_NEST_END(skb, nest_parms);
347
080774a2
HW
348 return 0;
349
350nfattr_failure:
e4466399 351 read_unlock_bh(&tcp_lock);
080774a2
HW
352 return -1;
353}
33923153 354
56558208
PNA
355static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
356 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
357};
358
33923153
PNA
359static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
360{
361 struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
362 struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
363
51df784e
PNA
364 /* updates could not contain anything about the private
365 * protocol info, in that case skip the parsing */
366 if (!attr)
367 return 0;
368
e905a9ed 369 nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
33923153 370
56558208
PNA
371 if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
372 return -EINVAL;
373
33923153
PNA
374 if (!tb[CTA_PROTOINFO_TCP_STATE-1])
375 return -EINVAL;
376
377 write_lock_bh(&tcp_lock);
e905a9ed 378 ct->proto.tcp.state =
33923153
PNA
379 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
380 write_unlock_bh(&tcp_lock);
381
382 return 0;
33923153 383}
080774a2
HW
384#endif
385
1da177e4
LT
386static unsigned int get_conntrack_index(const struct tcphdr *tcph)
387{
388 if (tcph->rst) return TCP_RST_SET;
389 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
390 else if (tcph->fin) return TCP_FIN_SET;
391 else if (tcph->ack) return TCP_ACK_SET;
392 else return TCP_NONE_SET;
393}
394
395/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
396 in IP Filter' by Guido van Rooij.
e905a9ed 397
1da177e4
LT
398 http://www.nluug.nl/events/sane2000/papers.html
399 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
e905a9ed 400
1da177e4
LT
401 The boundaries and the conditions are changed according to RFC793:
402 the packet must intersect the window (i.e. segments may be
403 after the right or before the left edge) and thus receivers may ACK
404 segments after the right edge of the window.
405
e905a9ed 406 td_maxend = max(sack + max(win,1)) seen in reply packets
1da177e4
LT
407 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
408 td_maxwin += seq + len - sender.td_maxend
409 if seq + len > sender.td_maxend
410 td_end = max(seq + len) seen in sent packets
e905a9ed 411
1da177e4
LT
412 I. Upper bound for valid data: seq <= sender.td_maxend
413 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
414 III. Upper bound for valid ack: sack <= receiver.td_end
415 IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
e905a9ed 416
1da177e4 417 where sack is the highest right edge of sack block found in the packet.
e905a9ed
YH
418
419 The upper bound limit for a valid ack is not ignored -
420 we doesn't have to deal with fragments.
1da177e4
LT
421*/
422
423static inline __u32 segment_seq_plus_len(__u32 seq,
424 size_t len,
425 struct iphdr *iph,
426 struct tcphdr *tcph)
427{
428 return (seq + len - (iph->ihl + tcph->doff)*4
429 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
430}
e905a9ed 431
1da177e4
LT
432/* Fixme: what about big packets? */
433#define MAXACKWINCONST 66000
434#define MAXACKWINDOW(sender) \
435 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
436 : MAXACKWINCONST)
e905a9ed 437
1da177e4
LT
438/*
439 * Simplified tcp_parse_options routine from tcp_input.c
440 */
441static void tcp_options(const struct sk_buff *skb,
442 struct iphdr *iph,
e905a9ed 443 struct tcphdr *tcph,
1da177e4
LT
444 struct ip_ct_tcp_state *state)
445{
446 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
447 unsigned char *ptr;
448 int length = (tcph->doff*4) - sizeof(struct tcphdr);
e905a9ed 449
1da177e4
LT
450 if (!length)
451 return;
452
453 ptr = skb_header_pointer(skb,
454 (iph->ihl * 4) + sizeof(struct tcphdr),
455 length, buff);
456 BUG_ON(ptr == NULL);
457
e905a9ed 458 state->td_scale =
1da177e4 459 state->flags = 0;
e905a9ed 460
1da177e4
LT
461 while (length > 0) {
462 int opcode=*ptr++;
463 int opsize;
e905a9ed 464
1da177e4
LT
465 switch (opcode) {
466 case TCPOPT_EOL:
467 return;
468 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
469 length--;
470 continue;
471 default:
472 opsize=*ptr++;
473 if (opsize < 2) /* "silly options" */
474 return;
475 if (opsize > length)
476 break; /* don't parse partial options */
477
e905a9ed 478 if (opcode == TCPOPT_SACK_PERM
1da177e4
LT
479 && opsize == TCPOLEN_SACK_PERM)
480 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
481 else if (opcode == TCPOPT_WINDOW
482 && opsize == TCPOLEN_WINDOW) {
483 state->td_scale = *(u_int8_t *)ptr;
e905a9ed 484
1da177e4
LT
485 if (state->td_scale > 14) {
486 /* See RFC1323 */
487 state->td_scale = 14;
488 }
489 state->flags |=
490 IP_CT_TCP_FLAG_WINDOW_SCALE;
491 }
492 ptr += opsize - 2;
493 length -= opsize;
494 }
495 }
496}
497
498static void tcp_sack(const struct sk_buff *skb,
499 struct iphdr *iph,
500 struct tcphdr *tcph,
501 __u32 *sack)
502{
503 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
504 unsigned char *ptr;
505 int length = (tcph->doff*4) - sizeof(struct tcphdr);
506 __u32 tmp;
507
508 if (!length)
509 return;
510
511 ptr = skb_header_pointer(skb,
512 (iph->ihl * 4) + sizeof(struct tcphdr),
513 length, buff);
514 BUG_ON(ptr == NULL);
515
516 /* Fast path for timestamp-only option */
517 if (length == TCPOLEN_TSTAMP_ALIGNED*4
4412ec49
YH
518 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
519 | (TCPOPT_NOP << 16)
520 | (TCPOPT_TIMESTAMP << 8)
521 | TCPOLEN_TIMESTAMP))
1da177e4 522 return;
e905a9ed 523
1da177e4
LT
524 while (length > 0) {
525 int opcode=*ptr++;
526 int opsize, i;
e905a9ed 527
1da177e4
LT
528 switch (opcode) {
529 case TCPOPT_EOL:
530 return;
531 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
532 length--;
533 continue;
534 default:
535 opsize=*ptr++;
536 if (opsize < 2) /* "silly options" */
537 return;
538 if (opsize > length)
539 break; /* don't parse partial options */
540
e905a9ed
YH
541 if (opcode == TCPOPT_SACK
542 && opsize >= (TCPOLEN_SACK_BASE
543 + TCPOLEN_SACK_PERBLOCK)
544 && !((opsize - TCPOLEN_SACK_BASE)
545 % TCPOLEN_SACK_PERBLOCK)) {
546 for (i = 0;
547 i < (opsize - TCPOLEN_SACK_BASE);
548 i += TCPOLEN_SACK_PERBLOCK) {
cdcb71bf 549 tmp = ntohl(*((__be32 *)(ptr+i)+1));
e905a9ed 550
1da177e4
LT
551 if (after(tmp, *sack))
552 *sack = tmp;
553 }
554 return;
555 }
556 ptr += opsize - 2;
557 length -= opsize;
558 }
559 }
560}
561
e905a9ed
YH
562static int tcp_in_window(struct ip_ct_tcp *state,
563 enum ip_conntrack_dir dir,
564 unsigned int index,
565 const struct sk_buff *skb,
566 struct iphdr *iph,
567 struct tcphdr *tcph)
1da177e4
LT
568{
569 struct ip_ct_tcp_state *sender = &state->seen[dir];
570 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
571 __u32 seq, ack, sack, end, win, swin;
572 int res;
e905a9ed 573
1da177e4
LT
574 /*
575 * Get the required data from the packet.
576 */
577 seq = ntohl(tcph->seq);
578 ack = sack = ntohl(tcph->ack_seq);
579 win = ntohs(tcph->window);
580 end = segment_seq_plus_len(seq, skb->len, iph, tcph);
e905a9ed 581
1da177e4
LT
582 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
583 tcp_sack(skb, iph, tcph, &sack);
e905a9ed 584
1da177e4
LT
585 DEBUGP("tcp_in_window: START\n");
586 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
587 "seq=%u ack=%u sack=%u win=%u end=%u\n",
e905a9ed 588 NIPQUAD(iph->saddr), ntohs(tcph->source),
1da177e4
LT
589 NIPQUAD(iph->daddr), ntohs(tcph->dest),
590 seq, ack, sack, win, end);
591 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
592 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
593 sender->td_end, sender->td_maxend, sender->td_maxwin,
e905a9ed
YH
594 sender->td_scale,
595 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1da177e4 596 receiver->td_scale);
e905a9ed 597
1da177e4
LT
598 if (sender->td_end == 0) {
599 /*
600 * Initialize sender data.
601 */
602 if (tcph->syn && tcph->ack) {
603 /*
604 * Outgoing SYN-ACK in reply to a SYN.
605 */
e905a9ed 606 sender->td_end =
1da177e4
LT
607 sender->td_maxend = end;
608 sender->td_maxwin = (win == 0 ? 1 : win);
609
610 tcp_options(skb, iph, tcph, sender);
e905a9ed 611 /*
1da177e4
LT
612 * RFC 1323:
613 * Both sides must send the Window Scale option
614 * to enable window scaling in either direction.
615 */
616 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
617 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
e905a9ed 618 sender->td_scale =
1da177e4
LT
619 receiver->td_scale = 0;
620 } else {
621 /*
622 * We are in the middle of a connection,
623 * its history is lost for us.
624 * Let's try to use the data from the packet.
e905a9ed 625 */
1da177e4
LT
626 sender->td_end = end;
627 sender->td_maxwin = (win == 0 ? 1 : win);
628 sender->td_maxend = end + sender->td_maxwin;
629 }
630 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
631 && dir == IP_CT_DIR_ORIGINAL)
632 || (state->state == TCP_CONNTRACK_SYN_RECV
e905a9ed 633 && dir == IP_CT_DIR_REPLY))
1da177e4
LT
634 && after(end, sender->td_end)) {
635 /*
636 * RFC 793: "if a TCP is reinitialized ... then it need
e905a9ed 637 * not wait at all; it must only be sure to use sequence
1da177e4
LT
638 * numbers larger than those recently used."
639 */
640 sender->td_end =
641 sender->td_maxend = end;
642 sender->td_maxwin = (win == 0 ? 1 : win);
643
644 tcp_options(skb, iph, tcph, sender);
645 }
e905a9ed 646
1da177e4
LT
647 if (!(tcph->ack)) {
648 /*
649 * If there is no ACK, just pretend it was set and OK.
650 */
651 ack = sack = receiver->td_end;
e905a9ed
YH
652 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
653 (TCP_FLAG_ACK|TCP_FLAG_RST))
1da177e4
LT
654 && (ack == 0)) {
655 /*
656 * Broken TCP stacks, that set ACK in RST packets as well
657 * with zero ack value.
658 */
659 ack = sack = receiver->td_end;
660 }
661
662 if (seq == end
e905a9ed
YH
663 && (!tcph->rst
664 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
1da177e4
LT
665 /*
666 * Packets contains no data: we assume it is valid
667 * and check the ack value only.
668 * However RST segments are always validated by their
669 * SEQ number, except when seq == 0 (reset sent answering
670 * SYN.
671 */
672 seq = end = sender->td_end;
e905a9ed 673
1da177e4
LT
674 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
675 "seq=%u ack=%u sack =%u win=%u end=%u\n",
676 NIPQUAD(iph->saddr), ntohs(tcph->source),
677 NIPQUAD(iph->daddr), ntohs(tcph->dest),
678 seq, ack, sack, win, end);
679 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
680 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
681 sender->td_end, sender->td_maxend, sender->td_maxwin,
e905a9ed 682 sender->td_scale,
1da177e4
LT
683 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
684 receiver->td_scale);
e905a9ed 685
1da177e4
LT
686 DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
687 before(seq, sender->td_maxend + 1),
e905a9ed
YH
688 after(end, sender->td_end - receiver->td_maxwin - 1),
689 before(sack, receiver->td_end + 1),
690 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
691
a09113c2
PM
692 if (before(seq, sender->td_maxend + 1) &&
693 after(end, sender->td_end - receiver->td_maxwin - 1) &&
694 before(sack, receiver->td_end + 1) &&
695 after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
e905a9ed 696 /*
1da177e4
LT
697 * Take into account window scaling (RFC 1323).
698 */
699 if (!tcph->syn)
700 win <<= sender->td_scale;
e905a9ed 701
1da177e4
LT
702 /*
703 * Update sender data.
704 */
705 swin = win + (sack - ack);
706 if (sender->td_maxwin < swin)
707 sender->td_maxwin = swin;
708 if (after(end, sender->td_end))
709 sender->td_end = end;
710 /*
711 * Update receiver data.
712 */
713 if (after(end, sender->td_maxend))
714 receiver->td_maxwin += end - sender->td_maxend;
715 if (after(sack + win, receiver->td_maxend - 1)) {
716 receiver->td_maxend = sack + win;
717 if (win == 0)
718 receiver->td_maxend++;
719 }
720
e905a9ed 721 /*
1da177e4
LT
722 * Check retransmissions.
723 */
724 if (index == TCP_ACK_SET) {
725 if (state->last_dir == dir
726 && state->last_seq == seq
727 && state->last_ack == ack
c1fe3ca5
GH
728 && state->last_end == end
729 && state->last_win == win)
1da177e4
LT
730 state->retrans++;
731 else {
732 state->last_dir = dir;
733 state->last_seq = seq;
734 state->last_ack = ack;
735 state->last_end = end;
c1fe3ca5 736 state->last_win = win;
1da177e4
LT
737 state->retrans = 0;
738 }
739 }
1da177e4
LT
740 res = 1;
741 } else {
a09113c2
PM
742 res = 0;
743 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
744 ip_ct_tcp_be_liberal)
745 res = 1;
746 if (!res && LOG_INVALID(IPPROTO_TCP))
608c8e4f 747 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
748 "ip_ct_tcp: %s ",
749 before(seq, sender->td_maxend + 1) ?
750 after(end, sender->td_end - receiver->td_maxwin - 1) ?
751 before(sack, receiver->td_end + 1) ?
752 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
753 : "ACK is under the lower bound (possible overly delayed ACK)"
754 : "ACK is over the upper bound (ACKed data not seen yet)"
755 : "SEQ is under the lower bound (already ACKed data retransmitted)"
756 : "SEQ is over the upper bound (over the window of the receiver)");
e905a9ed
YH
757 }
758
1da177e4
LT
759 DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
760 "receiver end=%u maxend=%u maxwin=%u\n",
e905a9ed 761 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
1da177e4
LT
762 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
763
764 return res;
765}
766
767#ifdef CONFIG_IP_NF_NAT_NEEDED
768/* Update sender->td_end after NAT successfully mangled the packet */
769void ip_conntrack_tcp_update(struct sk_buff *skb,
e905a9ed 770 struct ip_conntrack *conntrack,
1da177e4
LT
771 enum ip_conntrack_dir dir)
772{
773 struct iphdr *iph = skb->nh.iph;
c9bdd4b5 774 struct tcphdr *tcph = (void *)skb->nh.iph + ip_hdrlen(skb);
1da177e4
LT
775 __u32 end;
776#ifdef DEBUGP_VARS
777 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
778 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
779#endif
780
781 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
e905a9ed 782
e45b1be8 783 write_lock_bh(&tcp_lock);
1da177e4
LT
784 /*
785 * We have to worry for the ack in the reply packet only...
786 */
787 if (after(end, conntrack->proto.tcp.seen[dir].td_end))
788 conntrack->proto.tcp.seen[dir].td_end = end;
789 conntrack->proto.tcp.last_end = end;
e45b1be8 790 write_unlock_bh(&tcp_lock);
1da177e4
LT
791 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
792 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
793 sender->td_end, sender->td_maxend, sender->td_maxwin,
e905a9ed 794 sender->td_scale,
1da177e4
LT
795 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
796 receiver->td_scale);
797}
e905a9ed 798
1da177e4
LT
799#endif
800
801#define TH_FIN 0x01
802#define TH_SYN 0x02
803#define TH_RST 0x04
804#define TH_PUSH 0x08
805#define TH_ACK 0x10
806#define TH_URG 0x20
807#define TH_ECE 0x40
808#define TH_CWR 0x80
809
810/* table of valid flag combinations - ECE and CWR are always valid */
9b5b5cff 811static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
1da177e4
LT
812{
813 [TH_SYN] = 1,
a2d7222f 814 [TH_SYN|TH_PUSH] = 1,
d3ab4298
PM
815 [TH_SYN|TH_URG] = 1,
816 [TH_SYN|TH_PUSH|TH_URG] = 1,
817 [TH_SYN|TH_ACK] = 1,
3b2d59d1 818 [TH_SYN|TH_ACK|TH_PUSH] = 1,
1da177e4
LT
819 [TH_RST] = 1,
820 [TH_RST|TH_ACK] = 1,
821 [TH_RST|TH_ACK|TH_PUSH] = 1,
822 [TH_FIN|TH_ACK] = 1,
823 [TH_ACK] = 1,
824 [TH_ACK|TH_PUSH] = 1,
825 [TH_ACK|TH_URG] = 1,
826 [TH_ACK|TH_URG|TH_PUSH] = 1,
827 [TH_FIN|TH_ACK|TH_PUSH] = 1,
828 [TH_FIN|TH_ACK|TH_URG] = 1,
829 [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
830};
831
832/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
833static int tcp_error(struct sk_buff *skb,
834 enum ip_conntrack_info *ctinfo,
835 unsigned int hooknum)
836{
837 struct iphdr *iph = skb->nh.iph;
838 struct tcphdr _tcph, *th;
839 unsigned int tcplen = skb->len - iph->ihl * 4;
840 u_int8_t tcpflags;
841
842 /* Smaller that minimal TCP header? */
843 th = skb_header_pointer(skb, iph->ihl * 4,
844 sizeof(_tcph), &_tcph);
845 if (th == NULL) {
846 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 847 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
848 "ip_ct_tcp: short packet ");
849 return -NF_ACCEPT;
e905a9ed
YH
850 }
851
1da177e4
LT
852 /* Not whole TCP header or malformed packet */
853 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
854 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 855 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
856 "ip_ct_tcp: truncated/malformed packet ");
857 return -NF_ACCEPT;
858 }
e905a9ed 859
1da177e4
LT
860 /* Checksum invalid? Ignore.
861 * We skip checking packets on the outgoing path
84fa7933 862 * because it is assumed to be correct.
1da177e4
LT
863 */
864 /* FIXME: Source route IP option packets --RR */
39a27a35 865 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
96f6bf82 866 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
1da177e4 867 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 868 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
869 "ip_ct_tcp: bad TCP checksum ");
870 return -NF_ACCEPT;
871 }
872
873 /* Check TCP flags. */
874 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
875 if (!tcp_valid_flags[tcpflags]) {
876 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 877 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
878 "ip_ct_tcp: invalid TCP flag combination ");
879 return -NF_ACCEPT;
880 }
881
882 return NF_ACCEPT;
883}
884
885/* Returns verdict for packet, or -1 for invalid. */
886static int tcp_packet(struct ip_conntrack *conntrack,
887 const struct sk_buff *skb,
888 enum ip_conntrack_info ctinfo)
889{
890 enum tcp_conntrack new_state, old_state;
891 enum ip_conntrack_dir dir;
892 struct iphdr *iph = skb->nh.iph;
893 struct tcphdr *th, _tcph;
894 unsigned long timeout;
895 unsigned int index;
e905a9ed 896
1da177e4
LT
897 th = skb_header_pointer(skb, iph->ihl * 4,
898 sizeof(_tcph), &_tcph);
899 BUG_ON(th == NULL);
e905a9ed 900
e45b1be8 901 write_lock_bh(&tcp_lock);
1da177e4
LT
902 old_state = conntrack->proto.tcp.state;
903 dir = CTINFO2DIR(ctinfo);
904 index = get_conntrack_index(th);
905 new_state = tcp_conntracks[dir][index][old_state];
906
907 switch (new_state) {
908 case TCP_CONNTRACK_IGNORE:
73f30602 909 /* Ignored packets:
e905a9ed 910 *
73f30602
JK
911 * a) SYN in ORIGINAL
912 * b) SYN/ACK in REPLY
913 * c) ACK in reply direction after initial SYN in original.
914 */
1da177e4
LT
915 if (index == TCP_SYNACK_SET
916 && conntrack->proto.tcp.last_index == TCP_SYN_SET
917 && conntrack->proto.tcp.last_dir != dir
918 && ntohl(th->ack_seq) ==
e905a9ed
YH
919 conntrack->proto.tcp.last_end) {
920 /* This SYN/ACK acknowledges a SYN that we earlier
1da177e4
LT
921 * ignored as invalid. This means that the client and
922 * the server are both in sync, while the firewall is
923 * not. We kill this session and block the SYN/ACK so
e905a9ed 924 * that the client cannot but retransmit its SYN and
1da177e4
LT
925 * thus initiate a clean new session.
926 */
e905a9ed 927 write_unlock_bh(&tcp_lock);
1da177e4 928 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f
HW
929 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
930 NULL, "ip_ct_tcp: "
931 "killing out of sync session ");
e905a9ed
YH
932 if (del_timer(&conntrack->timeout))
933 conntrack->timeout.function((unsigned long)
934 conntrack);
935 return -NF_DROP;
1da177e4
LT
936 }
937 conntrack->proto.tcp.last_index = index;
938 conntrack->proto.tcp.last_dir = dir;
939 conntrack->proto.tcp.last_seq = ntohl(th->seq);
e905a9ed 940 conntrack->proto.tcp.last_end =
1da177e4 941 segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
e905a9ed 942
e45b1be8 943 write_unlock_bh(&tcp_lock);
1da177e4 944 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 945 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
946 "ip_ct_tcp: invalid packet ignored ");
947 return NF_ACCEPT;
948 case TCP_CONNTRACK_MAX:
949 /* Invalid packet */
950 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
951 dir, get_conntrack_index(th),
952 old_state);
e45b1be8 953 write_unlock_bh(&tcp_lock);
1da177e4 954 if (LOG_INVALID(IPPROTO_TCP))
608c8e4f 955 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
1da177e4
LT
956 "ip_ct_tcp: invalid state ");
957 return -NF_ACCEPT;
958 case TCP_CONNTRACK_SYN_SENT:
959 if (old_state < TCP_CONNTRACK_TIME_WAIT)
960 break;
961 if ((conntrack->proto.tcp.seen[dir].flags &
e905a9ed 962 IP_CT_TCP_FLAG_CLOSE_INIT)
1da177e4 963 || after(ntohl(th->seq),
e905a9ed
YH
964 conntrack->proto.tcp.seen[dir].td_end)) {
965 /* Attempt to reopen a closed connection.
966 * Delete this connection and look up again. */
967 write_unlock_bh(&tcp_lock);
968 if (del_timer(&conntrack->timeout))
969 conntrack->timeout.function((unsigned long)
970 conntrack);
971 return -NF_REPEAT;
1da177e4 972 } else {
e45b1be8 973 write_unlock_bh(&tcp_lock);
1da177e4
LT
974 if (LOG_INVALID(IPPROTO_TCP))
975 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
608c8e4f 976 NULL, "ip_ct_tcp: invalid SYN");
1da177e4
LT
977 return -NF_ACCEPT;
978 }
979 case TCP_CONNTRACK_CLOSE:
980 if (index == TCP_RST_SET
73f30602 981 && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
e905a9ed
YH
982 && conntrack->proto.tcp.last_index == TCP_SYN_SET)
983 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
984 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
1da177e4 985 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
93b1fae4 986 /* RST sent to invalid SYN or ACK we had let through
73f30602
JK
987 * at a) and c) above:
988 *
989 * a) SYN was in window then
990 * c) we hold a half-open connection.
991 *
992 * Delete our connection entry.
1da177e4 993 * We skip window checking, because packet might ACK
73f30602 994 * segments we ignored. */
1da177e4
LT
995 goto in_window;
996 }
93b1fae4 997 /* Just fall through */
1da177e4
LT
998 default:
999 /* Keep compilers happy. */
1000 break;
1001 }
1002
e905a9ed 1003 if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
1da177e4 1004 skb, iph, th)) {
e45b1be8 1005 write_unlock_bh(&tcp_lock);
1da177e4
LT
1006 return -NF_ACCEPT;
1007 }
1008 in_window:
e905a9ed 1009 /* From now on we have got in-window packets */
1da177e4
LT
1010 conntrack->proto.tcp.last_index = index;
1011
1012 DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
1013 "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1014 NIPQUAD(iph->saddr), ntohs(th->source),
1015 NIPQUAD(iph->daddr), ntohs(th->dest),
1016 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1017 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1018 old_state, new_state);
1019
1020 conntrack->proto.tcp.state = new_state;
e905a9ed 1021 if (old_state != new_state
1da177e4 1022 && (new_state == TCP_CONNTRACK_FIN_WAIT
e905a9ed 1023 || new_state == TCP_CONNTRACK_CLOSE))
1da177e4
LT
1024 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1025 timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
1026 && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
1027 ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
e45b1be8 1028 write_unlock_bh(&tcp_lock);
1da177e4 1029
ac3247ba
HW
1030 ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
1031 if (new_state != old_state)
1032 ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
1033
1da177e4
LT
1034 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
1035 /* If only reply is a RST, we can consider ourselves not to
1036 have an established connection: this is a fairly common
1037 problem case, so we can delete the conntrack
1038 immediately. --RR */
1039 if (th->rst) {
1040 if (del_timer(&conntrack->timeout))
1041 conntrack->timeout.function((unsigned long)
1042 conntrack);
1043 return NF_ACCEPT;
1044 }
1045 } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
1046 && (old_state == TCP_CONNTRACK_SYN_RECV
1047 || old_state == TCP_CONNTRACK_ESTABLISHED)
1048 && new_state == TCP_CONNTRACK_ESTABLISHED) {
e905a9ed
YH
1049 /* Set ASSURED if we see see valid ack in ESTABLISHED
1050 after SYN_RECV or a valid answer for a picked up
1da177e4 1051 connection. */
8ddec746
HW
1052 set_bit(IPS_ASSURED_BIT, &conntrack->status);
1053 ip_conntrack_event_cache(IPCT_STATUS, skb);
1da177e4
LT
1054 }
1055 ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
1056
1057 return NF_ACCEPT;
1058}
e905a9ed 1059
1da177e4
LT
1060/* Called when a new connection for this protocol found. */
1061static int tcp_new(struct ip_conntrack *conntrack,
1062 const struct sk_buff *skb)
1063{
1064 enum tcp_conntrack new_state;
1065 struct iphdr *iph = skb->nh.iph;
1066 struct tcphdr *th, _tcph;
1067#ifdef DEBUGP_VARS
1068 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1069 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1070#endif
1071
1072 th = skb_header_pointer(skb, iph->ihl * 4,
1073 sizeof(_tcph), &_tcph);
1074 BUG_ON(th == NULL);
e905a9ed 1075
1da177e4
LT
1076 /* Don't need lock here: this conntrack not in circulation yet */
1077 new_state
1078 = tcp_conntracks[0][get_conntrack_index(th)]
1079 [TCP_CONNTRACK_NONE];
1080
1081 /* Invalid: delete conntrack */
1082 if (new_state >= TCP_CONNTRACK_MAX) {
1083 DEBUGP("ip_ct_tcp: invalid new deleting.\n");
1084 return 0;
1085 }
1086
1087 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1088 /* SYN packet */
1089 conntrack->proto.tcp.seen[0].td_end =
1090 segment_seq_plus_len(ntohl(th->seq), skb->len,
1091 iph, th);
1092 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1093 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1094 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1095 conntrack->proto.tcp.seen[0].td_maxend =
1096 conntrack->proto.tcp.seen[0].td_end;
1097
1098 tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
1099 conntrack->proto.tcp.seen[1].flags = 0;
1da177e4
LT
1100 } else if (ip_ct_tcp_loose == 0) {
1101 /* Don't try to pick up connections. */
1102 return 0;
1103 } else {
1104 /*
1105 * We are in the middle of a connection,
1106 * its history is lost for us.
1107 * Let's try to use the data from the packet.
1108 */
1109 conntrack->proto.tcp.seen[0].td_end =
1110 segment_seq_plus_len(ntohl(th->seq), skb->len,
1111 iph, th);
1112 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1113 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1114 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1115 conntrack->proto.tcp.seen[0].td_maxend =
e905a9ed 1116 conntrack->proto.tcp.seen[0].td_end +
1da177e4
LT
1117 conntrack->proto.tcp.seen[0].td_maxwin;
1118 conntrack->proto.tcp.seen[0].td_scale = 0;
1119
a09113c2
PM
1120 /* We assume SACK and liberal window checking to handle
1121 * window scaling */
1da177e4 1122 conntrack->proto.tcp.seen[0].flags =
a09113c2
PM
1123 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1124 IP_CT_TCP_FLAG_BE_LIBERAL;
1da177e4 1125 }
e905a9ed 1126
1da177e4
LT
1127 conntrack->proto.tcp.seen[1].td_end = 0;
1128 conntrack->proto.tcp.seen[1].td_maxend = 0;
1129 conntrack->proto.tcp.seen[1].td_maxwin = 1;
e905a9ed 1130 conntrack->proto.tcp.seen[1].td_scale = 0;
1da177e4
LT
1131
1132 /* tcp_packet will set them */
1133 conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1134 conntrack->proto.tcp.last_index = TCP_NONE_SET;
e905a9ed 1135
1da177e4
LT
1136 DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1137 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1138 sender->td_end, sender->td_maxend, sender->td_maxwin,
e905a9ed 1139 sender->td_scale,
1da177e4
LT
1140 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1141 receiver->td_scale);
1142 return 1;
1143}
e905a9ed 1144
1da177e4
LT
1145struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
1146{
1147 .proto = IPPROTO_TCP,
1148 .name = "tcp",
1149 .pkt_to_tuple = tcp_pkt_to_tuple,
1150 .invert_tuple = tcp_invert_tuple,
1151 .print_tuple = tcp_print_tuple,
1152 .print_conntrack = tcp_print_conntrack,
1153 .packet = tcp_packet,
1154 .new = tcp_new,
1155 .error = tcp_error,
080774a2
HW
1156#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1157 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1158 .to_nfattr = tcp_to_nfattr,
33923153 1159 .from_nfattr = nfattr_to_tcp,
080774a2
HW
1160 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
1161 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
1162#endif
1da177e4 1163};