]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/netfilter/ipvs/ip_vs_proto_tcp.c
IPVS: Fallback if persistence engine fails
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_proto_tcp.c
1 /*
2  * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/kernel.h>
20 #include <linux/ip.h>
21 #include <linux/tcp.h>                  /* for tcphdr */
22 #include <net/ip.h>
23 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
24 #include <net/ip6_checksum.h>
25 #include <linux/netfilter.h>
26 #include <linux/netfilter_ipv4.h>
27
28 #include <net/ip_vs.h>
29
30 static int
31 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
32                   int *verdict, struct ip_vs_conn **cpp)
33 {
34         struct ip_vs_service *svc;
35         struct tcphdr _tcph, *th;
36         struct ip_vs_iphdr iph;
37
38         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
39
40         th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
41         if (th == NULL) {
42                 *verdict = NF_DROP;
43                 return 0;
44         }
45
46         if (th->syn &&
47             (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
48                                      th->dest))) {
49                 if (ip_vs_todrop()) {
50                         /*
51                          * It seems that we are very loaded.
52                          * We have to drop this packet :(
53                          */
54                         ip_vs_service_put(svc);
55                         *verdict = NF_DROP;
56                         return 0;
57                 }
58
59                 /*
60                  * Let the virtual server select a real server for the
61                  * incoming connection, and create a connection entry.
62                  */
63                 *cpp = ip_vs_schedule(svc, skb);
64                 if (!*cpp) {
65                         *verdict = ip_vs_leave(svc, skb, pp);
66                         return 0;
67                 }
68                 ip_vs_service_put(svc);
69         }
70         return 1;
71 }
72
73
74 static inline void
75 tcp_fast_csum_update(int af, struct tcphdr *tcph,
76                      const union nf_inet_addr *oldip,
77                      const union nf_inet_addr *newip,
78                      __be16 oldport, __be16 newport)
79 {
80 #ifdef CONFIG_IP_VS_IPV6
81         if (af == AF_INET6)
82                 tcph->check =
83                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
84                                          ip_vs_check_diff2(oldport, newport,
85                                                 ~csum_unfold(tcph->check))));
86         else
87 #endif
88         tcph->check =
89                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
90                                  ip_vs_check_diff2(oldport, newport,
91                                                 ~csum_unfold(tcph->check))));
92 }
93
94
95 static inline void
96 tcp_partial_csum_update(int af, struct tcphdr *tcph,
97                      const union nf_inet_addr *oldip,
98                      const union nf_inet_addr *newip,
99                      __be16 oldlen, __be16 newlen)
100 {
101 #ifdef CONFIG_IP_VS_IPV6
102         if (af == AF_INET6)
103                 tcph->check =
104                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
105                                          ip_vs_check_diff2(oldlen, newlen,
106                                                 ~csum_unfold(tcph->check))));
107         else
108 #endif
109         tcph->check =
110                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
111                                 ip_vs_check_diff2(oldlen, newlen,
112                                                 ~csum_unfold(tcph->check))));
113 }
114
115
116 static int
117 tcp_snat_handler(struct sk_buff *skb,
118                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
119 {
120         struct tcphdr *tcph;
121         unsigned int tcphoff;
122         int oldlen;
123
124 #ifdef CONFIG_IP_VS_IPV6
125         if (cp->af == AF_INET6)
126                 tcphoff = sizeof(struct ipv6hdr);
127         else
128 #endif
129                 tcphoff = ip_hdrlen(skb);
130         oldlen = skb->len - tcphoff;
131
132         /* csum_check requires unshared skb */
133         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
134                 return 0;
135
136         if (unlikely(cp->app != NULL)) {
137                 /* Some checks before mangling */
138                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
139                         return 0;
140
141                 /* Call application helper if needed */
142                 if (!ip_vs_app_pkt_out(cp, skb))
143                         return 0;
144         }
145
146         tcph = (void *)skb_network_header(skb) + tcphoff;
147         tcph->source = cp->vport;
148
149         /* Adjust TCP checksums */
150         if (skb->ip_summed == CHECKSUM_PARTIAL) {
151                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
152                                         htons(oldlen),
153                                         htons(skb->len - tcphoff));
154         } else if (!cp->app) {
155                 /* Only port and addr are changed, do fast csum update */
156                 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
157                                      cp->dport, cp->vport);
158                 if (skb->ip_summed == CHECKSUM_COMPLETE)
159                         skb->ip_summed = CHECKSUM_NONE;
160         } else {
161                 /* full checksum calculation */
162                 tcph->check = 0;
163                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
164 #ifdef CONFIG_IP_VS_IPV6
165                 if (cp->af == AF_INET6)
166                         tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
167                                                       &cp->caddr.in6,
168                                                       skb->len - tcphoff,
169                                                       cp->protocol, skb->csum);
170                 else
171 #endif
172                         tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
173                                                         cp->caddr.ip,
174                                                         skb->len - tcphoff,
175                                                         cp->protocol,
176                                                         skb->csum);
177
178                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
179                           pp->name, tcph->check,
180                           (char*)&(tcph->check) - (char*)tcph);
181         }
182         return 1;
183 }
184
185
186 static int
187 tcp_dnat_handler(struct sk_buff *skb,
188                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
189 {
190         struct tcphdr *tcph;
191         unsigned int tcphoff;
192         int oldlen;
193
194 #ifdef CONFIG_IP_VS_IPV6
195         if (cp->af == AF_INET6)
196                 tcphoff = sizeof(struct ipv6hdr);
197         else
198 #endif
199                 tcphoff = ip_hdrlen(skb);
200         oldlen = skb->len - tcphoff;
201
202         /* csum_check requires unshared skb */
203         if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
204                 return 0;
205
206         if (unlikely(cp->app != NULL)) {
207                 /* Some checks before mangling */
208                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
209                         return 0;
210
211                 /*
212                  *      Attempt ip_vs_app call.
213                  *      It will fix ip_vs_conn and iph ack_seq stuff
214                  */
215                 if (!ip_vs_app_pkt_in(cp, skb))
216                         return 0;
217         }
218
219         tcph = (void *)skb_network_header(skb) + tcphoff;
220         tcph->dest = cp->dport;
221
222         /*
223          *      Adjust TCP checksums
224          */
225         if (skb->ip_summed == CHECKSUM_PARTIAL) {
226                 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
227                                         htons(oldlen),
228                                         htons(skb->len - tcphoff));
229         } else if (!cp->app) {
230                 /* Only port and addr are changed, do fast csum update */
231                 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
232                                      cp->vport, cp->dport);
233                 if (skb->ip_summed == CHECKSUM_COMPLETE)
234                         skb->ip_summed = CHECKSUM_NONE;
235         } else {
236                 /* full checksum calculation */
237                 tcph->check = 0;
238                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
239 #ifdef CONFIG_IP_VS_IPV6
240                 if (cp->af == AF_INET6)
241                         tcph->check = csum_ipv6_magic(&cp->caddr.in6,
242                                                       &cp->daddr.in6,
243                                                       skb->len - tcphoff,
244                                                       cp->protocol, skb->csum);
245                 else
246 #endif
247                         tcph->check = csum_tcpudp_magic(cp->caddr.ip,
248                                                         cp->daddr.ip,
249                                                         skb->len - tcphoff,
250                                                         cp->protocol,
251                                                         skb->csum);
252                 skb->ip_summed = CHECKSUM_UNNECESSARY;
253         }
254         return 1;
255 }
256
257
258 static int
259 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
260 {
261         unsigned int tcphoff;
262
263 #ifdef CONFIG_IP_VS_IPV6
264         if (af == AF_INET6)
265                 tcphoff = sizeof(struct ipv6hdr);
266         else
267 #endif
268                 tcphoff = ip_hdrlen(skb);
269
270         switch (skb->ip_summed) {
271         case CHECKSUM_NONE:
272                 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
273         case CHECKSUM_COMPLETE:
274 #ifdef CONFIG_IP_VS_IPV6
275                 if (af == AF_INET6) {
276                         if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
277                                             &ipv6_hdr(skb)->daddr,
278                                             skb->len - tcphoff,
279                                             ipv6_hdr(skb)->nexthdr,
280                                             skb->csum)) {
281                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
282                                                  "Failed checksum for");
283                                 return 0;
284                         }
285                 } else
286 #endif
287                         if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
288                                               ip_hdr(skb)->daddr,
289                                               skb->len - tcphoff,
290                                               ip_hdr(skb)->protocol,
291                                               skb->csum)) {
292                                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
293                                                  "Failed checksum for");
294                                 return 0;
295                         }
296                 break;
297         default:
298                 /* No need to checksum. */
299                 break;
300         }
301
302         return 1;
303 }
304
305
306 #define TCP_DIR_INPUT           0
307 #define TCP_DIR_OUTPUT          4
308 #define TCP_DIR_INPUT_ONLY      8
309
310 static const int tcp_state_off[IP_VS_DIR_LAST] = {
311         [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
312         [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
313         [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
314 };
315
316 /*
317  *      Timeout table[state]
318  */
319 static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
320         [IP_VS_TCP_S_NONE]              =       2*HZ,
321         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
322         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
323         [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
324         [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
325         [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
326         [IP_VS_TCP_S_CLOSE]             =       10*HZ,
327         [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
328         [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
329         [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
330         [IP_VS_TCP_S_SYNACK]            =       120*HZ,
331         [IP_VS_TCP_S_LAST]              =       2*HZ,
332 };
333
334 static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
335         [IP_VS_TCP_S_NONE]              =       "NONE",
336         [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
337         [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
338         [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
339         [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
340         [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
341         [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
342         [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
343         [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
344         [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
345         [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
346         [IP_VS_TCP_S_LAST]              =       "BUG!",
347 };
348
349 #define sNO IP_VS_TCP_S_NONE
350 #define sES IP_VS_TCP_S_ESTABLISHED
351 #define sSS IP_VS_TCP_S_SYN_SENT
352 #define sSR IP_VS_TCP_S_SYN_RECV
353 #define sFW IP_VS_TCP_S_FIN_WAIT
354 #define sTW IP_VS_TCP_S_TIME_WAIT
355 #define sCL IP_VS_TCP_S_CLOSE
356 #define sCW IP_VS_TCP_S_CLOSE_WAIT
357 #define sLA IP_VS_TCP_S_LAST_ACK
358 #define sLI IP_VS_TCP_S_LISTEN
359 #define sSA IP_VS_TCP_S_SYNACK
360
361 struct tcp_states_t {
362         int next_state[IP_VS_TCP_S_LAST];
363 };
364
365 static const char * tcp_state_name(int state)
366 {
367         if (state >= IP_VS_TCP_S_LAST)
368                 return "ERR!";
369         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
370 }
371
372 static struct tcp_states_t tcp_states [] = {
373 /*      INPUT */
374 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
375 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
376 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
377 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
378 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
379
380 /*      OUTPUT */
381 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
382 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
383 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
384 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
385 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
386
387 /*      INPUT-ONLY */
388 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
389 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
390 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
391 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
392 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
393 };
394
395 static struct tcp_states_t tcp_states_dos [] = {
396 /*      INPUT */
397 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
398 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
399 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
400 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
401 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
402
403 /*      OUTPUT */
404 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
405 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
406 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
407 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
408 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
409
410 /*      INPUT-ONLY */
411 /*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
412 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
413 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
414 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
415 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
416 };
417
418 static struct tcp_states_t *tcp_state_table = tcp_states;
419
420
421 static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
422 {
423         int on = (flags & 1);           /* secure_tcp */
424
425         /*
426         ** FIXME: change secure_tcp to independent sysctl var
427         ** or make it per-service or per-app because it is valid
428         ** for most if not for all of the applications. Something
429         ** like "capabilities" (flags) for each object.
430         */
431         tcp_state_table = (on? tcp_states_dos : tcp_states);
432 }
433
434 static int
435 tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
436 {
437         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
438                                        tcp_state_name_table, sname, to);
439 }
440
441 static inline int tcp_state_idx(struct tcphdr *th)
442 {
443         if (th->rst)
444                 return 3;
445         if (th->syn)
446                 return 0;
447         if (th->fin)
448                 return 1;
449         if (th->ack)
450                 return 2;
451         return -1;
452 }
453
454 static inline void
455 set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
456               int direction, struct tcphdr *th)
457 {
458         int state_idx;
459         int new_state = IP_VS_TCP_S_CLOSE;
460         int state_off = tcp_state_off[direction];
461
462         /*
463          *    Update state offset to INPUT_ONLY if necessary
464          *    or delete NO_OUTPUT flag if output packet detected
465          */
466         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
467                 if (state_off == TCP_DIR_OUTPUT)
468                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
469                 else
470                         state_off = TCP_DIR_INPUT_ONLY;
471         }
472
473         if ((state_idx = tcp_state_idx(th)) < 0) {
474                 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
475                 goto tcp_state_out;
476         }
477
478         new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
479
480   tcp_state_out:
481         if (new_state != cp->state) {
482                 struct ip_vs_dest *dest = cp->dest;
483
484                 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
485                               "%s:%d state: %s->%s conn->refcnt:%d\n",
486                               pp->name,
487                               ((state_off == TCP_DIR_OUTPUT) ?
488                                "output " : "input "),
489                               th->syn ? 'S' : '.',
490                               th->fin ? 'F' : '.',
491                               th->ack ? 'A' : '.',
492                               th->rst ? 'R' : '.',
493                               IP_VS_DBG_ADDR(cp->af, &cp->daddr),
494                               ntohs(cp->dport),
495                               IP_VS_DBG_ADDR(cp->af, &cp->caddr),
496                               ntohs(cp->cport),
497                               tcp_state_name(cp->state),
498                               tcp_state_name(new_state),
499                               atomic_read(&cp->refcnt));
500
501                 if (dest) {
502                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
503                             (new_state != IP_VS_TCP_S_ESTABLISHED)) {
504                                 atomic_dec(&dest->activeconns);
505                                 atomic_inc(&dest->inactconns);
506                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
507                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
508                                    (new_state == IP_VS_TCP_S_ESTABLISHED)) {
509                                 atomic_inc(&dest->activeconns);
510                                 atomic_dec(&dest->inactconns);
511                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
512                         }
513                 }
514         }
515
516         cp->timeout = pp->timeout_table[cp->state = new_state];
517 }
518
519
520 /*
521  *      Handle state transitions
522  */
523 static int
524 tcp_state_transition(struct ip_vs_conn *cp, int direction,
525                      const struct sk_buff *skb,
526                      struct ip_vs_protocol *pp)
527 {
528         struct tcphdr _tcph, *th;
529
530 #ifdef CONFIG_IP_VS_IPV6
531         int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
532 #else
533         int ihl = ip_hdrlen(skb);
534 #endif
535
536         th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
537         if (th == NULL)
538                 return 0;
539
540         spin_lock(&cp->lock);
541         set_tcp_state(pp, cp, direction, th);
542         spin_unlock(&cp->lock);
543
544         return 1;
545 }
546
547
548 /*
549  *      Hash table for TCP application incarnations
550  */
551 #define TCP_APP_TAB_BITS        4
552 #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
553 #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
554
555 static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
556 static DEFINE_SPINLOCK(tcp_app_lock);
557
558 static inline __u16 tcp_app_hashkey(__be16 port)
559 {
560         return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
561                 & TCP_APP_TAB_MASK;
562 }
563
564
565 static int tcp_register_app(struct ip_vs_app *inc)
566 {
567         struct ip_vs_app *i;
568         __u16 hash;
569         __be16 port = inc->port;
570         int ret = 0;
571
572         hash = tcp_app_hashkey(port);
573
574         spin_lock_bh(&tcp_app_lock);
575         list_for_each_entry(i, &tcp_apps[hash], p_list) {
576                 if (i->port == port) {
577                         ret = -EEXIST;
578                         goto out;
579                 }
580         }
581         list_add(&inc->p_list, &tcp_apps[hash]);
582         atomic_inc(&ip_vs_protocol_tcp.appcnt);
583
584   out:
585         spin_unlock_bh(&tcp_app_lock);
586         return ret;
587 }
588
589
590 static void
591 tcp_unregister_app(struct ip_vs_app *inc)
592 {
593         spin_lock_bh(&tcp_app_lock);
594         atomic_dec(&ip_vs_protocol_tcp.appcnt);
595         list_del(&inc->p_list);
596         spin_unlock_bh(&tcp_app_lock);
597 }
598
599
600 static int
601 tcp_app_conn_bind(struct ip_vs_conn *cp)
602 {
603         int hash;
604         struct ip_vs_app *inc;
605         int result = 0;
606
607         /* Default binding: bind app only for NAT */
608         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
609                 return 0;
610
611         /* Lookup application incarnations and bind the right one */
612         hash = tcp_app_hashkey(cp->vport);
613
614         spin_lock(&tcp_app_lock);
615         list_for_each_entry(inc, &tcp_apps[hash], p_list) {
616                 if (inc->port == cp->vport) {
617                         if (unlikely(!ip_vs_app_inc_get(inc)))
618                                 break;
619                         spin_unlock(&tcp_app_lock);
620
621                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
622                                       "%s:%u to app %s on port %u\n",
623                                       __func__,
624                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
625                                       ntohs(cp->cport),
626                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
627                                       ntohs(cp->vport),
628                                       inc->name, ntohs(inc->port));
629
630                         cp->app = inc;
631                         if (inc->init_conn)
632                                 result = inc->init_conn(inc, cp);
633                         goto out;
634                 }
635         }
636         spin_unlock(&tcp_app_lock);
637
638   out:
639         return result;
640 }
641
642
643 /*
644  *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
645  */
646 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
647 {
648         spin_lock(&cp->lock);
649         cp->state = IP_VS_TCP_S_LISTEN;
650         cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
651         spin_unlock(&cp->lock);
652 }
653
654
655 static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
656 {
657         IP_VS_INIT_HASH_TABLE(tcp_apps);
658         pp->timeout_table = tcp_timeouts;
659 }
660
661
662 static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
663 {
664 }
665
666
667 struct ip_vs_protocol ip_vs_protocol_tcp = {
668         .name =                 "TCP",
669         .protocol =             IPPROTO_TCP,
670         .num_states =           IP_VS_TCP_S_LAST,
671         .dont_defrag =          0,
672         .appcnt =               ATOMIC_INIT(0),
673         .init =                 ip_vs_tcp_init,
674         .exit =                 ip_vs_tcp_exit,
675         .register_app =         tcp_register_app,
676         .unregister_app =       tcp_unregister_app,
677         .conn_schedule =        tcp_conn_schedule,
678         .conn_in_get =          ip_vs_conn_in_get_proto,
679         .conn_out_get =         ip_vs_conn_out_get_proto,
680         .snat_handler =         tcp_snat_handler,
681         .dnat_handler =         tcp_dnat_handler,
682         .csum_check =           tcp_csum_check,
683         .state_name =           tcp_state_name,
684         .state_transition =     tcp_state_transition,
685         .app_conn_bind =        tcp_app_conn_bind,
686         .debug_packet =         ip_vs_tcpudp_debug_packet,
687         .timeout_change =       tcp_timeout_change,
688         .set_state_timeout =    tcp_set_state_timeout,
689 };