]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/netfilter/ipvs/ip_vs_proto_udp.c
Merge branch 'next-spi' of git://git.secretlab.ca/git/linux-2.6
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_proto_udp.c
1 /*
2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/in.h>
20 #include <linux/ip.h>
21 #include <linux/kernel.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/udp.h>
25
26 #include <net/ip_vs.h>
27 #include <net/ip.h>
28 #include <net/ip6_checksum.h>
29
30 static int
31 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
32                   int *verdict, struct ip_vs_conn **cpp)
33 {
34         struct ip_vs_service *svc;
35         struct udphdr _udph, *uh;
36         struct ip_vs_iphdr iph;
37
38         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
39
40         uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
41         if (uh == NULL) {
42                 *verdict = NF_DROP;
43                 return 0;
44         }
45
46         svc = ip_vs_service_get(af, skb->mark, iph.protocol,
47                                 &iph.daddr, uh->dest);
48         if (svc) {
49                 if (ip_vs_todrop()) {
50                         /*
51                          * It seems that we are very loaded.
52                          * We have to drop this packet :(
53                          */
54                         ip_vs_service_put(svc);
55                         *verdict = NF_DROP;
56                         return 0;
57                 }
58
59                 /*
60                  * Let the virtual server select a real server for the
61                  * incoming connection, and create a connection entry.
62                  */
63                 *cpp = ip_vs_schedule(svc, skb);
64                 if (!*cpp) {
65                         *verdict = ip_vs_leave(svc, skb, pp);
66                         return 0;
67                 }
68                 ip_vs_service_put(svc);
69         }
70         return 1;
71 }
72
73
74 static inline void
75 udp_fast_csum_update(int af, struct udphdr *uhdr,
76                      const union nf_inet_addr *oldip,
77                      const union nf_inet_addr *newip,
78                      __be16 oldport, __be16 newport)
79 {
80 #ifdef CONFIG_IP_VS_IPV6
81         if (af == AF_INET6)
82                 uhdr->check =
83                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
84                                          ip_vs_check_diff2(oldport, newport,
85                                                 ~csum_unfold(uhdr->check))));
86         else
87 #endif
88                 uhdr->check =
89                         csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
90                                          ip_vs_check_diff2(oldport, newport,
91                                                 ~csum_unfold(uhdr->check))));
92         if (!uhdr->check)
93                 uhdr->check = CSUM_MANGLED_0;
94 }
95
96 static inline void
97 udp_partial_csum_update(int af, struct udphdr *uhdr,
98                      const union nf_inet_addr *oldip,
99                      const union nf_inet_addr *newip,
100                      __be16 oldlen, __be16 newlen)
101 {
102 #ifdef CONFIG_IP_VS_IPV6
103         if (af == AF_INET6)
104                 uhdr->check =
105                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106                                          ip_vs_check_diff2(oldlen, newlen,
107                                                 ~csum_unfold(uhdr->check))));
108         else
109 #endif
110         uhdr->check =
111                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112                                 ip_vs_check_diff2(oldlen, newlen,
113                                                 ~csum_unfold(uhdr->check))));
114 }
115
116
117 static int
118 udp_snat_handler(struct sk_buff *skb,
119                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
120 {
121         struct udphdr *udph;
122         unsigned int udphoff;
123         int oldlen;
124
125 #ifdef CONFIG_IP_VS_IPV6
126         if (cp->af == AF_INET6)
127                 udphoff = sizeof(struct ipv6hdr);
128         else
129 #endif
130                 udphoff = ip_hdrlen(skb);
131         oldlen = skb->len - udphoff;
132
133         /* csum_check requires unshared skb */
134         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
135                 return 0;
136
137         if (unlikely(cp->app != NULL)) {
138                 /* Some checks before mangling */
139                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
140                         return 0;
141
142                 /*
143                  *      Call application helper if needed
144                  */
145                 if (!ip_vs_app_pkt_out(cp, skb))
146                         return 0;
147         }
148
149         udph = (void *)skb_network_header(skb) + udphoff;
150         udph->source = cp->vport;
151
152         /*
153          *      Adjust UDP checksums
154          */
155         if (skb->ip_summed == CHECKSUM_PARTIAL) {
156                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
157                                         htons(oldlen),
158                                         htons(skb->len - udphoff));
159         } else if (!cp->app && (udph->check != 0)) {
160                 /* Only port and addr are changed, do fast csum update */
161                 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
162                                      cp->dport, cp->vport);
163                 if (skb->ip_summed == CHECKSUM_COMPLETE)
164                         skb->ip_summed = CHECKSUM_NONE;
165         } else {
166                 /* full checksum calculation */
167                 udph->check = 0;
168                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
169 #ifdef CONFIG_IP_VS_IPV6
170                 if (cp->af == AF_INET6)
171                         udph->check = csum_ipv6_magic(&cp->vaddr.in6,
172                                                       &cp->caddr.in6,
173                                                       skb->len - udphoff,
174                                                       cp->protocol, skb->csum);
175                 else
176 #endif
177                         udph->check = csum_tcpudp_magic(cp->vaddr.ip,
178                                                         cp->caddr.ip,
179                                                         skb->len - udphoff,
180                                                         cp->protocol,
181                                                         skb->csum);
182                 if (udph->check == 0)
183                         udph->check = CSUM_MANGLED_0;
184                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185                           pp->name, udph->check,
186                           (char*)&(udph->check) - (char*)udph);
187         }
188         return 1;
189 }
190
191
192 static int
193 udp_dnat_handler(struct sk_buff *skb,
194                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
195 {
196         struct udphdr *udph;
197         unsigned int udphoff;
198         int oldlen;
199
200 #ifdef CONFIG_IP_VS_IPV6
201         if (cp->af == AF_INET6)
202                 udphoff = sizeof(struct ipv6hdr);
203         else
204 #endif
205                 udphoff = ip_hdrlen(skb);
206         oldlen = skb->len - udphoff;
207
208         /* csum_check requires unshared skb */
209         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
210                 return 0;
211
212         if (unlikely(cp->app != NULL)) {
213                 /* Some checks before mangling */
214                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
215                         return 0;
216
217                 /*
218                  *      Attempt ip_vs_app call.
219                  *      It will fix ip_vs_conn
220                  */
221                 if (!ip_vs_app_pkt_in(cp, skb))
222                         return 0;
223         }
224
225         udph = (void *)skb_network_header(skb) + udphoff;
226         udph->dest = cp->dport;
227
228         /*
229          *      Adjust UDP checksums
230          */
231         if (skb->ip_summed == CHECKSUM_PARTIAL) {
232                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
233                                         htons(oldlen),
234                                         htons(skb->len - udphoff));
235         } else if (!cp->app && (udph->check != 0)) {
236                 /* Only port and addr are changed, do fast csum update */
237                 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
238                                      cp->vport, cp->dport);
239                 if (skb->ip_summed == CHECKSUM_COMPLETE)
240                         skb->ip_summed = CHECKSUM_NONE;
241         } else {
242                 /* full checksum calculation */
243                 udph->check = 0;
244                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
245 #ifdef CONFIG_IP_VS_IPV6
246                 if (cp->af == AF_INET6)
247                         udph->check = csum_ipv6_magic(&cp->caddr.in6,
248                                                       &cp->daddr.in6,
249                                                       skb->len - udphoff,
250                                                       cp->protocol, skb->csum);
251                 else
252 #endif
253                         udph->check = csum_tcpudp_magic(cp->caddr.ip,
254                                                         cp->daddr.ip,
255                                                         skb->len - udphoff,
256                                                         cp->protocol,
257                                                         skb->csum);
258                 if (udph->check == 0)
259                         udph->check = CSUM_MANGLED_0;
260                 skb->ip_summed = CHECKSUM_UNNECESSARY;
261         }
262         return 1;
263 }
264
265
266 static int
267 udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
268 {
269         struct udphdr _udph, *uh;
270         unsigned int udphoff;
271
272 #ifdef CONFIG_IP_VS_IPV6
273         if (af == AF_INET6)
274                 udphoff = sizeof(struct ipv6hdr);
275         else
276 #endif
277                 udphoff = ip_hdrlen(skb);
278
279         uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
280         if (uh == NULL)
281                 return 0;
282
283         if (uh->check != 0) {
284                 switch (skb->ip_summed) {
285                 case CHECKSUM_NONE:
286                         skb->csum = skb_checksum(skb, udphoff,
287                                                  skb->len - udphoff, 0);
288                 case CHECKSUM_COMPLETE:
289 #ifdef CONFIG_IP_VS_IPV6
290                         if (af == AF_INET6) {
291                                 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
292                                                     &ipv6_hdr(skb)->daddr,
293                                                     skb->len - udphoff,
294                                                     ipv6_hdr(skb)->nexthdr,
295                                                     skb->csum)) {
296                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
297                                                          "Failed checksum for");
298                                         return 0;
299                                 }
300                         } else
301 #endif
302                                 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
303                                                       ip_hdr(skb)->daddr,
304                                                       skb->len - udphoff,
305                                                       ip_hdr(skb)->protocol,
306                                                       skb->csum)) {
307                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
308                                                          "Failed checksum for");
309                                         return 0;
310                                 }
311                         break;
312                 default:
313                         /* No need to checksum. */
314                         break;
315                 }
316         }
317         return 1;
318 }
319
320
321 /*
322  *      Note: the caller guarantees that only one of register_app,
323  *      unregister_app or app_conn_bind is called each time.
324  */
325
326 #define UDP_APP_TAB_BITS        4
327 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
328 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
329
330 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
331 static DEFINE_SPINLOCK(udp_app_lock);
332
333 static inline __u16 udp_app_hashkey(__be16 port)
334 {
335         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
336                 & UDP_APP_TAB_MASK;
337 }
338
339
340 static int udp_register_app(struct ip_vs_app *inc)
341 {
342         struct ip_vs_app *i;
343         __u16 hash;
344         __be16 port = inc->port;
345         int ret = 0;
346
347         hash = udp_app_hashkey(port);
348
349
350         spin_lock_bh(&udp_app_lock);
351         list_for_each_entry(i, &udp_apps[hash], p_list) {
352                 if (i->port == port) {
353                         ret = -EEXIST;
354                         goto out;
355                 }
356         }
357         list_add(&inc->p_list, &udp_apps[hash]);
358         atomic_inc(&ip_vs_protocol_udp.appcnt);
359
360   out:
361         spin_unlock_bh(&udp_app_lock);
362         return ret;
363 }
364
365
366 static void
367 udp_unregister_app(struct ip_vs_app *inc)
368 {
369         spin_lock_bh(&udp_app_lock);
370         atomic_dec(&ip_vs_protocol_udp.appcnt);
371         list_del(&inc->p_list);
372         spin_unlock_bh(&udp_app_lock);
373 }
374
375
376 static int udp_app_conn_bind(struct ip_vs_conn *cp)
377 {
378         int hash;
379         struct ip_vs_app *inc;
380         int result = 0;
381
382         /* Default binding: bind app only for NAT */
383         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
384                 return 0;
385
386         /* Lookup application incarnations and bind the right one */
387         hash = udp_app_hashkey(cp->vport);
388
389         spin_lock(&udp_app_lock);
390         list_for_each_entry(inc, &udp_apps[hash], p_list) {
391                 if (inc->port == cp->vport) {
392                         if (unlikely(!ip_vs_app_inc_get(inc)))
393                                 break;
394                         spin_unlock(&udp_app_lock);
395
396                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
397                                       "%s:%u to app %s on port %u\n",
398                                       __func__,
399                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
400                                       ntohs(cp->cport),
401                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
402                                       ntohs(cp->vport),
403                                       inc->name, ntohs(inc->port));
404
405                         cp->app = inc;
406                         if (inc->init_conn)
407                                 result = inc->init_conn(inc, cp);
408                         goto out;
409                 }
410         }
411         spin_unlock(&udp_app_lock);
412
413   out:
414         return result;
415 }
416
417
418 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
419         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
420         [IP_VS_UDP_S_LAST]              =       2*HZ,
421 };
422
423 static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
424         [IP_VS_UDP_S_NORMAL]            =       "UDP",
425         [IP_VS_UDP_S_LAST]              =       "BUG!",
426 };
427
428
429 static int
430 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
431 {
432         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
433                                        udp_state_name_table, sname, to);
434 }
435
436 static const char * udp_state_name(int state)
437 {
438         if (state >= IP_VS_UDP_S_LAST)
439                 return "ERR!";
440         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
441 }
442
443 static int
444 udp_state_transition(struct ip_vs_conn *cp, int direction,
445                      const struct sk_buff *skb,
446                      struct ip_vs_protocol *pp)
447 {
448         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
449         return 1;
450 }
451
452 static void udp_init(struct ip_vs_protocol *pp)
453 {
454         IP_VS_INIT_HASH_TABLE(udp_apps);
455         pp->timeout_table = udp_timeouts;
456 }
457
458 static void udp_exit(struct ip_vs_protocol *pp)
459 {
460 }
461
462
463 struct ip_vs_protocol ip_vs_protocol_udp = {
464         .name =                 "UDP",
465         .protocol =             IPPROTO_UDP,
466         .num_states =           IP_VS_UDP_S_LAST,
467         .dont_defrag =          0,
468         .init =                 udp_init,
469         .exit =                 udp_exit,
470         .conn_schedule =        udp_conn_schedule,
471         .conn_in_get =          ip_vs_conn_in_get_proto,
472         .conn_out_get =         ip_vs_conn_out_get_proto,
473         .snat_handler =         udp_snat_handler,
474         .dnat_handler =         udp_dnat_handler,
475         .csum_check =           udp_csum_check,
476         .state_transition =     udp_state_transition,
477         .state_name =           udp_state_name,
478         .register_app =         udp_register_app,
479         .unregister_app =       udp_unregister_app,
480         .app_conn_bind =        udp_app_conn_bind,
481         .debug_packet =         ip_vs_tcpudp_debug_packet,
482         .timeout_change =       NULL,
483         .set_state_timeout =    udp_set_state_timeout,
484 };