]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Activate IPv6 Netfilter hooks
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0
SH
746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
1da177e4 759 ip_vs_zero_estimator(stats);
e93615d0 760
3a14a313 761 spin_unlock_bh(&stats->lock);
1da177e4
LT
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
09571c7a
VB
778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
1da177e4
LT
790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
c860c6b1 830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
09571c7a
VB
838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if (!(atype & IPV6_ADDR_UNICAST) &&
842 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843 return -EINVAL;
844 } else
845#endif
846 {
847 atype = inet_addr_type(&init_net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849 return -EINVAL;
850 }
1da177e4 851
0da974f4 852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
853 if (dest == NULL) {
854 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855 return -ENOMEM;
856 }
1da177e4 857
c860c6b1 858 dest->af = svc->af;
1da177e4 859 dest->protocol = svc->protocol;
c860c6b1 860 dest->vaddr = svc->addr;
1da177e4
LT
861 dest->vport = svc->port;
862 dest->vfwmark = svc->fwmark;
c860c6b1 863 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
864 dest->port = udest->port;
865
866 atomic_set(&dest->activeconns, 0);
867 atomic_set(&dest->inactconns, 0);
868 atomic_set(&dest->persistconns, 0);
869 atomic_set(&dest->refcnt, 0);
870
871 INIT_LIST_HEAD(&dest->d_list);
872 spin_lock_init(&dest->dst_lock);
873 spin_lock_init(&dest->stats.lock);
874 __ip_vs_update_dest(svc, dest, udest);
875 ip_vs_new_estimator(&dest->stats);
876
877 *dest_p = dest;
878
879 LeaveFunction(2);
880 return 0;
881}
882
883
884/*
885 * Add a destination into an existing service
886 */
887static int
c860c6b1 888ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
889{
890 struct ip_vs_dest *dest;
c860c6b1 891 union nf_inet_addr daddr;
014d730d 892 __be16 dport = udest->port;
1da177e4
LT
893 int ret;
894
895 EnterFunction(2);
896
897 if (udest->weight < 0) {
898 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899 return -ERANGE;
900 }
901
902 if (udest->l_threshold > udest->u_threshold) {
903 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904 "upper threshold\n");
905 return -ERANGE;
906 }
907
c860c6b1
JV
908 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
1da177e4
LT
910 /*
911 * Check if the dest already exists in the list
912 */
7937df15
JV
913 dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
1da177e4
LT
915 if (dest != NULL) {
916 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917 return -EEXIST;
918 }
919
920 /*
921 * Check if the dest already exists in the trash and
922 * is from the same service
923 */
7937df15
JV
924 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
1da177e4 926 if (dest != NULL) {
cfc78c5a
JV
927 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
928 "dest->refcnt=%d, service %u/%s:%u\n",
929 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
930 atomic_read(&dest->refcnt),
931 dest->vfwmark,
932 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
933 ntohs(dest->vport));
934
1da177e4
LT
935 __ip_vs_update_dest(svc, dest, udest);
936
937 /*
938 * Get the destination from the trash
939 */
940 list_del(&dest->n_list);
941
942 ip_vs_new_estimator(&dest->stats);
943
944 write_lock_bh(&__ip_vs_svc_lock);
945
946 /*
947 * Wait until all other svc users go away.
948 */
949 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
950
951 list_add(&dest->n_list, &svc->destinations);
952 svc->num_dests++;
953
954 /* call the update_service function of its scheduler */
82dfb6f3
SW
955 if (svc->scheduler->update_service)
956 svc->scheduler->update_service(svc);
1da177e4
LT
957
958 write_unlock_bh(&__ip_vs_svc_lock);
959 return 0;
960 }
961
962 /*
963 * Allocate and initialize the dest structure
964 */
965 ret = ip_vs_new_dest(svc, udest, &dest);
966 if (ret) {
967 return ret;
968 }
969
970 /*
971 * Add the dest entry into the list
972 */
973 atomic_inc(&dest->refcnt);
974
975 write_lock_bh(&__ip_vs_svc_lock);
976
977 /*
978 * Wait until all other svc users go away.
979 */
980 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
981
982 list_add(&dest->n_list, &svc->destinations);
983 svc->num_dests++;
984
985 /* call the update_service function of its scheduler */
82dfb6f3
SW
986 if (svc->scheduler->update_service)
987 svc->scheduler->update_service(svc);
1da177e4
LT
988
989 write_unlock_bh(&__ip_vs_svc_lock);
990
991 LeaveFunction(2);
992
993 return 0;
994}
995
996
997/*
998 * Edit a destination in the given service
999 */
1000static int
c860c6b1 1001ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1002{
1003 struct ip_vs_dest *dest;
c860c6b1 1004 union nf_inet_addr daddr;
014d730d 1005 __be16 dport = udest->port;
1da177e4
LT
1006
1007 EnterFunction(2);
1008
1009 if (udest->weight < 0) {
1010 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1011 return -ERANGE;
1012 }
1013
1014 if (udest->l_threshold > udest->u_threshold) {
1015 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1016 "upper threshold\n");
1017 return -ERANGE;
1018 }
1019
c860c6b1
JV
1020 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1021
1da177e4
LT
1022 /*
1023 * Lookup the destination list
1024 */
7937df15
JV
1025 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1026
1da177e4
LT
1027 if (dest == NULL) {
1028 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1029 return -ENOENT;
1030 }
1031
1032 __ip_vs_update_dest(svc, dest, udest);
1033
1034 write_lock_bh(&__ip_vs_svc_lock);
1035
1036 /* Wait until all other svc users go away */
cae7ca3d 1037 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1038
1039 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1040 if (svc->scheduler->update_service)
1041 svc->scheduler->update_service(svc);
1da177e4
LT
1042
1043 write_unlock_bh(&__ip_vs_svc_lock);
1044
1045 LeaveFunction(2);
1046
1047 return 0;
1048}
1049
1050
1051/*
1052 * Delete a destination (must be already unlinked from the service)
1053 */
1054static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1055{
1056 ip_vs_kill_estimator(&dest->stats);
1057
1058 /*
1059 * Remove it from the d-linked list with the real services.
1060 */
1061 write_lock_bh(&__ip_vs_rs_lock);
1062 ip_vs_rs_unhash(dest);
1063 write_unlock_bh(&__ip_vs_rs_lock);
1064
1065 /*
1066 * Decrease the refcnt of the dest, and free the dest
1067 * if nobody refers to it (refcnt=0). Otherwise, throw
1068 * the destination into the trash.
1069 */
1070 if (atomic_dec_and_test(&dest->refcnt)) {
1071 ip_vs_dst_reset(dest);
1072 /* simply decrease svc->refcnt here, let the caller check
1073 and release the service if nobody refers to it.
1074 Only user context can release destination and service,
1075 and only one user context can update virtual service at a
1076 time, so the operation here is OK */
1077 atomic_dec(&dest->svc->refcnt);
1078 kfree(dest);
1079 } else {
cfc78c5a
JV
1080 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1081 "dest->refcnt=%d\n",
1082 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1083 ntohs(dest->port),
1084 atomic_read(&dest->refcnt));
1da177e4
LT
1085 list_add(&dest->n_list, &ip_vs_dest_trash);
1086 atomic_inc(&dest->refcnt);
1087 }
1088}
1089
1090
1091/*
1092 * Unlink a destination from the given service
1093 */
1094static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095 struct ip_vs_dest *dest,
1096 int svcupd)
1097{
1098 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1099
1100 /*
1101 * Remove it from the d-linked destination list.
1102 */
1103 list_del(&dest->n_list);
1104 svc->num_dests--;
82dfb6f3
SW
1105
1106 /*
1107 * Call the update_service function of its scheduler
1108 */
1109 if (svcupd && svc->scheduler->update_service)
1110 svc->scheduler->update_service(svc);
1da177e4
LT
1111}
1112
1113
1114/*
1115 * Delete a destination server in the given service
1116 */
1117static int
c860c6b1 1118ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1119{
1120 struct ip_vs_dest *dest;
014d730d 1121 __be16 dport = udest->port;
1da177e4
LT
1122
1123 EnterFunction(2);
1124
7937df15 1125 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1126
1da177e4
LT
1127 if (dest == NULL) {
1128 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1129 return -ENOENT;
1130 }
1131
1132 write_lock_bh(&__ip_vs_svc_lock);
1133
1134 /*
1135 * Wait until all other svc users go away.
1136 */
1137 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1138
1139 /*
1140 * Unlink dest from the service
1141 */
1142 __ip_vs_unlink_dest(svc, dest, 1);
1143
1144 write_unlock_bh(&__ip_vs_svc_lock);
1145
1146 /*
1147 * Delete the destination
1148 */
1149 __ip_vs_del_dest(dest);
1150
1151 LeaveFunction(2);
1152
1153 return 0;
1154}
1155
1156
1157/*
1158 * Add a service into the service hash table
1159 */
1160static int
c860c6b1
JV
1161ip_vs_add_service(struct ip_vs_service_user_kern *u,
1162 struct ip_vs_service **svc_p)
1da177e4
LT
1163{
1164 int ret = 0;
1165 struct ip_vs_scheduler *sched = NULL;
1166 struct ip_vs_service *svc = NULL;
1167
1168 /* increase the module use count */
1169 ip_vs_use_count_inc();
1170
1171 /* Lookup the scheduler by 'u->sched_name' */
1172 sched = ip_vs_scheduler_get(u->sched_name);
1173 if (sched == NULL) {
1174 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175 u->sched_name);
1176 ret = -ENOENT;
1177 goto out_mod_dec;
1178 }
1179
0da974f4 1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1181 if (svc == NULL) {
1182 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1183 ret = -ENOMEM;
1184 goto out_err;
1185 }
1da177e4
LT
1186
1187 /* I'm the first user of the service */
1188 atomic_set(&svc->usecnt, 1);
1189 atomic_set(&svc->refcnt, 0);
1190
c860c6b1 1191 svc->af = u->af;
1da177e4 1192 svc->protocol = u->protocol;
c860c6b1 1193 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1194 svc->port = u->port;
1195 svc->fwmark = u->fwmark;
1196 svc->flags = u->flags;
1197 svc->timeout = u->timeout * HZ;
1198 svc->netmask = u->netmask;
1199
1200 INIT_LIST_HEAD(&svc->destinations);
1201 rwlock_init(&svc->sched_lock);
1202 spin_lock_init(&svc->stats.lock);
1203
1204 /* Bind the scheduler */
1205 ret = ip_vs_bind_scheduler(svc, sched);
1206 if (ret)
1207 goto out_err;
1208 sched = NULL;
1209
1210 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT)
1212 atomic_inc(&ip_vs_ftpsvc_counter);
1213 else if (svc->port == 0)
1214 atomic_inc(&ip_vs_nullsvc_counter);
1215
1216 ip_vs_new_estimator(&svc->stats);
1217 ip_vs_num_services++;
1218
1219 /* Hash the service into the service table */
1220 write_lock_bh(&__ip_vs_svc_lock);
1221 ip_vs_svc_hash(svc);
1222 write_unlock_bh(&__ip_vs_svc_lock);
1223
1224 *svc_p = svc;
1225 return 0;
1226
1227 out_err:
1228 if (svc != NULL) {
1229 if (svc->scheduler)
1230 ip_vs_unbind_scheduler(svc);
1231 if (svc->inc) {
1232 local_bh_disable();
1233 ip_vs_app_inc_put(svc->inc);
1234 local_bh_enable();
1235 }
1236 kfree(svc);
1237 }
1238 ip_vs_scheduler_put(sched);
1239
1240 out_mod_dec:
1241 /* decrease the module use count */
1242 ip_vs_use_count_dec();
1243
1244 return ret;
1245}
1246
1247
1248/*
1249 * Edit a service and bind it with a new scheduler
1250 */
1251static int
c860c6b1 1252ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1253{
1254 struct ip_vs_scheduler *sched, *old_sched;
1255 int ret = 0;
1256
1257 /*
1258 * Lookup the scheduler, by 'u->sched_name'
1259 */
1260 sched = ip_vs_scheduler_get(u->sched_name);
1261 if (sched == NULL) {
1262 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1263 u->sched_name);
1264 return -ENOENT;
1265 }
1266 old_sched = sched;
1267
1268 write_lock_bh(&__ip_vs_svc_lock);
1269
1270 /*
1271 * Wait until all other svc users go away.
1272 */
1273 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1274
1275 /*
1276 * Set the flags and timeout value
1277 */
1278 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279 svc->timeout = u->timeout * HZ;
1280 svc->netmask = u->netmask;
1281
1282 old_sched = svc->scheduler;
1283 if (sched != old_sched) {
1284 /*
1285 * Unbind the old scheduler
1286 */
1287 if ((ret = ip_vs_unbind_scheduler(svc))) {
1288 old_sched = sched;
1289 goto out;
1290 }
1291
1292 /*
1293 * Bind the new scheduler
1294 */
1295 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1296 /*
1297 * If ip_vs_bind_scheduler fails, restore the old
1298 * scheduler.
1299 * The main reason of failure is out of memory.
1300 *
1301 * The question is if the old scheduler can be
1302 * restored all the time. TODO: if it cannot be
1303 * restored some time, we must delete the service,
1304 * otherwise the system may crash.
1305 */
1306 ip_vs_bind_scheduler(svc, old_sched);
1307 old_sched = sched;
1308 goto out;
1309 }
1310 }
1311
1312 out:
1313 write_unlock_bh(&__ip_vs_svc_lock);
1314
1315 if (old_sched)
1316 ip_vs_scheduler_put(old_sched);
1317
1318 return ret;
1319}
1320
1321
1322/*
1323 * Delete a service from the service list
1324 * - The service must be unlinked, unlocked and not referenced!
1325 * - We are called under _bh lock
1326 */
1327static void __ip_vs_del_service(struct ip_vs_service *svc)
1328{
1329 struct ip_vs_dest *dest, *nxt;
1330 struct ip_vs_scheduler *old_sched;
1331
1332 ip_vs_num_services--;
1333 ip_vs_kill_estimator(&svc->stats);
1334
1335 /* Unbind scheduler */
1336 old_sched = svc->scheduler;
1337 ip_vs_unbind_scheduler(svc);
1338 if (old_sched)
1339 ip_vs_scheduler_put(old_sched);
1340
1341 /* Unbind app inc */
1342 if (svc->inc) {
1343 ip_vs_app_inc_put(svc->inc);
1344 svc->inc = NULL;
1345 }
1346
1347 /*
1348 * Unlink the whole destination list
1349 */
1350 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1351 __ip_vs_unlink_dest(svc, dest, 0);
1352 __ip_vs_del_dest(dest);
1353 }
1354
1355 /*
1356 * Update the virtual service counters
1357 */
1358 if (svc->port == FTPPORT)
1359 atomic_dec(&ip_vs_ftpsvc_counter);
1360 else if (svc->port == 0)
1361 atomic_dec(&ip_vs_nullsvc_counter);
1362
1363 /*
1364 * Free the service if nobody refers to it
1365 */
1366 if (atomic_read(&svc->refcnt) == 0)
1367 kfree(svc);
1368
1369 /* decrease the module use count */
1370 ip_vs_use_count_dec();
1371}
1372
1373/*
1374 * Delete a service from the service list
1375 */
1376static int ip_vs_del_service(struct ip_vs_service *svc)
1377{
1378 if (svc == NULL)
1379 return -EEXIST;
1380
1381 /*
1382 * Unhash it from the service table
1383 */
1384 write_lock_bh(&__ip_vs_svc_lock);
1385
1386 ip_vs_svc_unhash(svc);
1387
1388 /*
1389 * Wait until all the svc users go away.
1390 */
1391 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1392
1393 __ip_vs_del_service(svc);
1394
1395 write_unlock_bh(&__ip_vs_svc_lock);
1396
1397 return 0;
1398}
1399
1400
1401/*
1402 * Flush all the virtual services
1403 */
1404static int ip_vs_flush(void)
1405{
1406 int idx;
1407 struct ip_vs_service *svc, *nxt;
1408
1409 /*
1410 * Flush the service table hashed by <protocol,addr,port>
1411 */
1412 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1413 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1414 write_lock_bh(&__ip_vs_svc_lock);
1415 ip_vs_svc_unhash(svc);
1416 /*
1417 * Wait until all the svc users go away.
1418 */
1419 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1420 __ip_vs_del_service(svc);
1421 write_unlock_bh(&__ip_vs_svc_lock);
1422 }
1423 }
1424
1425 /*
1426 * Flush the service table hashed by fwmark
1427 */
1428 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1429 list_for_each_entry_safe(svc, nxt,
1430 &ip_vs_svc_fwm_table[idx], f_list) {
1431 write_lock_bh(&__ip_vs_svc_lock);
1432 ip_vs_svc_unhash(svc);
1433 /*
1434 * Wait until all the svc users go away.
1435 */
1436 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1437 __ip_vs_del_service(svc);
1438 write_unlock_bh(&__ip_vs_svc_lock);
1439 }
1440 }
1441
1442 return 0;
1443}
1444
1445
1446/*
1447 * Zero counters in a service or all services
1448 */
1449static int ip_vs_zero_service(struct ip_vs_service *svc)
1450{
1451 struct ip_vs_dest *dest;
1452
1453 write_lock_bh(&__ip_vs_svc_lock);
1454 list_for_each_entry(dest, &svc->destinations, n_list) {
1455 ip_vs_zero_stats(&dest->stats);
1456 }
1457 ip_vs_zero_stats(&svc->stats);
1458 write_unlock_bh(&__ip_vs_svc_lock);
1459 return 0;
1460}
1461
1462static int ip_vs_zero_all(void)
1463{
1464 int idx;
1465 struct ip_vs_service *svc;
1466
1467 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1468 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1469 ip_vs_zero_service(svc);
1470 }
1471 }
1472
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1475 ip_vs_zero_service(svc);
1476 }
1477 }
1478
1479 ip_vs_zero_stats(&ip_vs_stats);
1480 return 0;
1481}
1482
1483
1484static int
1485proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1486 void __user *buffer, size_t *lenp, loff_t *ppos)
1487{
1488 int *valp = table->data;
1489 int val = *valp;
1490 int rc;
1491
1492 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1493 if (write && (*valp != val)) {
1494 if ((*valp < 0) || (*valp > 3)) {
1495 /* Restore the correct value */
1496 *valp = val;
1497 } else {
1da177e4 1498 update_defense_level();
1da177e4
LT
1499 }
1500 }
1501 return rc;
1502}
1503
1504
1505static int
1506proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1507 void __user *buffer, size_t *lenp, loff_t *ppos)
1508{
1509 int *valp = table->data;
1510 int val[2];
1511 int rc;
1512
1513 /* backup the value first */
1514 memcpy(val, valp, sizeof(val));
1515
1516 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1517 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1518 /* Restore the correct value */
1519 memcpy(valp, val, sizeof(val));
1520 }
1521 return rc;
1522}
1523
1524
1525/*
1526 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1527 */
1528
1529static struct ctl_table vs_vars[] = {
1530 {
1da177e4
LT
1531 .procname = "amemthresh",
1532 .data = &sysctl_ip_vs_amemthresh,
1533 .maxlen = sizeof(int),
1534 .mode = 0644,
1535 .proc_handler = &proc_dointvec,
1536 },
1537#ifdef CONFIG_IP_VS_DEBUG
1538 {
1da177e4
LT
1539 .procname = "debug_level",
1540 .data = &sysctl_ip_vs_debug_level,
1541 .maxlen = sizeof(int),
1542 .mode = 0644,
1543 .proc_handler = &proc_dointvec,
1544 },
1545#endif
1546 {
1da177e4
LT
1547 .procname = "am_droprate",
1548 .data = &sysctl_ip_vs_am_droprate,
1549 .maxlen = sizeof(int),
1550 .mode = 0644,
1551 .proc_handler = &proc_dointvec,
1552 },
1553 {
1da177e4
LT
1554 .procname = "drop_entry",
1555 .data = &sysctl_ip_vs_drop_entry,
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = &proc_do_defense_mode,
1559 },
1560 {
1da177e4
LT
1561 .procname = "drop_packet",
1562 .data = &sysctl_ip_vs_drop_packet,
1563 .maxlen = sizeof(int),
1564 .mode = 0644,
1565 .proc_handler = &proc_do_defense_mode,
1566 },
1567 {
1da177e4
LT
1568 .procname = "secure_tcp",
1569 .data = &sysctl_ip_vs_secure_tcp,
1570 .maxlen = sizeof(int),
1571 .mode = 0644,
1572 .proc_handler = &proc_do_defense_mode,
1573 },
1574#if 0
1575 {
1da177e4
LT
1576 .procname = "timeout_established",
1577 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1578 .maxlen = sizeof(int),
1579 .mode = 0644,
1580 .proc_handler = &proc_dointvec_jiffies,
1581 },
1582 {
1da177e4
LT
1583 .procname = "timeout_synsent",
1584 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec_jiffies,
1588 },
1589 {
1da177e4
LT
1590 .procname = "timeout_synrecv",
1591 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = &proc_dointvec_jiffies,
1595 },
1596 {
1da177e4
LT
1597 .procname = "timeout_finwait",
1598 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
1601 .proc_handler = &proc_dointvec_jiffies,
1602 },
1603 {
1da177e4
LT
1604 .procname = "timeout_timewait",
1605 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = &proc_dointvec_jiffies,
1609 },
1610 {
1da177e4
LT
1611 .procname = "timeout_close",
1612 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1613 .maxlen = sizeof(int),
1614 .mode = 0644,
1615 .proc_handler = &proc_dointvec_jiffies,
1616 },
1617 {
1da177e4
LT
1618 .procname = "timeout_closewait",
1619 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
1622 .proc_handler = &proc_dointvec_jiffies,
1623 },
1624 {
1da177e4
LT
1625 .procname = "timeout_lastack",
1626 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1627 .maxlen = sizeof(int),
1628 .mode = 0644,
1629 .proc_handler = &proc_dointvec_jiffies,
1630 },
1631 {
1da177e4
LT
1632 .procname = "timeout_listen",
1633 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1634 .maxlen = sizeof(int),
1635 .mode = 0644,
1636 .proc_handler = &proc_dointvec_jiffies,
1637 },
1638 {
1da177e4
LT
1639 .procname = "timeout_synack",
1640 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1641 .maxlen = sizeof(int),
1642 .mode = 0644,
1643 .proc_handler = &proc_dointvec_jiffies,
1644 },
1645 {
1da177e4
LT
1646 .procname = "timeout_udp",
1647 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = &proc_dointvec_jiffies,
1651 },
1652 {
1da177e4
LT
1653 .procname = "timeout_icmp",
1654 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1655 .maxlen = sizeof(int),
1656 .mode = 0644,
1657 .proc_handler = &proc_dointvec_jiffies,
1658 },
1659#endif
1660 {
1da177e4
LT
1661 .procname = "cache_bypass",
1662 .data = &sysctl_ip_vs_cache_bypass,
1663 .maxlen = sizeof(int),
1664 .mode = 0644,
1665 .proc_handler = &proc_dointvec,
1666 },
1667 {
1da177e4
LT
1668 .procname = "expire_nodest_conn",
1669 .data = &sysctl_ip_vs_expire_nodest_conn,
1670 .maxlen = sizeof(int),
1671 .mode = 0644,
1672 .proc_handler = &proc_dointvec,
1673 },
1674 {
1da177e4
LT
1675 .procname = "expire_quiescent_template",
1676 .data = &sysctl_ip_vs_expire_quiescent_template,
1677 .maxlen = sizeof(int),
1678 .mode = 0644,
1679 .proc_handler = &proc_dointvec,
1680 },
1681 {
1da177e4
LT
1682 .procname = "sync_threshold",
1683 .data = &sysctl_ip_vs_sync_threshold,
1684 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1685 .mode = 0644,
1686 .proc_handler = &proc_do_sync_threshold,
1687 },
1688 {
1da177e4
LT
1689 .procname = "nat_icmp_send",
1690 .data = &sysctl_ip_vs_nat_icmp_send,
1691 .maxlen = sizeof(int),
1692 .mode = 0644,
1693 .proc_handler = &proc_dointvec,
1694 },
1695 { .ctl_name = 0 }
1696};
1697
5587da55 1698const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1699 { .procname = "net", .ctl_name = CTL_NET, },
1700 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1701 { .procname = "vs", },
1702 { }
1da177e4 1703};
90754f8e 1704EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1705
1706static struct ctl_table_header * sysctl_header;
1707
1708#ifdef CONFIG_PROC_FS
1709
1710struct ip_vs_iter {
1711 struct list_head *table;
1712 int bucket;
1713};
1714
1715/*
1716 * Write the contents of the VS rule table to a PROCfs file.
1717 * (It is kept just for backward compatibility)
1718 */
1719static inline const char *ip_vs_fwd_name(unsigned flags)
1720{
1721 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1722 case IP_VS_CONN_F_LOCALNODE:
1723 return "Local";
1724 case IP_VS_CONN_F_TUNNEL:
1725 return "Tunnel";
1726 case IP_VS_CONN_F_DROUTE:
1727 return "Route";
1728 default:
1729 return "Masq";
1730 }
1731}
1732
1733
1734/* Get the Nth entry in the two lists */
1735static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1736{
1737 struct ip_vs_iter *iter = seq->private;
1738 int idx;
1739 struct ip_vs_service *svc;
1740
1741 /* look in hash by protocol */
1742 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1743 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1744 if (pos-- == 0){
1745 iter->table = ip_vs_svc_table;
1746 iter->bucket = idx;
1747 return svc;
1748 }
1749 }
1750 }
1751
1752 /* keep looking in fwmark */
1753 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1754 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1755 if (pos-- == 0) {
1756 iter->table = ip_vs_svc_fwm_table;
1757 iter->bucket = idx;
1758 return svc;
1759 }
1760 }
1761 }
1762
1763 return NULL;
1764}
1765
1766static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1767{
1768
1769 read_lock_bh(&__ip_vs_svc_lock);
1770 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1771}
1772
1773
1774static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1775{
1776 struct list_head *e;
1777 struct ip_vs_iter *iter;
1778 struct ip_vs_service *svc;
1779
1780 ++*pos;
1781 if (v == SEQ_START_TOKEN)
1782 return ip_vs_info_array(seq,0);
1783
1784 svc = v;
1785 iter = seq->private;
1786
1787 if (iter->table == ip_vs_svc_table) {
1788 /* next service in table hashed by protocol */
1789 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1790 return list_entry(e, struct ip_vs_service, s_list);
1791
1792
1793 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1794 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1795 s_list) {
1796 return svc;
1797 }
1798 }
1799
1800 iter->table = ip_vs_svc_fwm_table;
1801 iter->bucket = -1;
1802 goto scan_fwmark;
1803 }
1804
1805 /* next service in hashed by fwmark */
1806 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1807 return list_entry(e, struct ip_vs_service, f_list);
1808
1809 scan_fwmark:
1810 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1811 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1812 f_list)
1813 return svc;
1814 }
1815
1816 return NULL;
1817}
1818
1819static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1820{
1821 read_unlock_bh(&__ip_vs_svc_lock);
1822}
1823
1824
1825static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1826{
1827 if (v == SEQ_START_TOKEN) {
1828 seq_printf(seq,
1829 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1830 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1831 seq_puts(seq,
1832 "Prot LocalAddress:Port Scheduler Flags\n");
1833 seq_puts(seq,
1834 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1835 } else {
1836 const struct ip_vs_service *svc = v;
1837 const struct ip_vs_iter *iter = seq->private;
1838 const struct ip_vs_dest *dest;
1839
667a5f18
VB
1840 if (iter->table == ip_vs_svc_table) {
1841#ifdef CONFIG_IP_VS_IPV6
1842 if (svc->af == AF_INET6)
1843 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1844 ip_vs_proto_name(svc->protocol),
1845 NIP6(svc->addr.in6),
1846 ntohs(svc->port),
1847 svc->scheduler->name);
1848 else
1849#endif
1850 seq_printf(seq, "%s %08X:%04X %s ",
1851 ip_vs_proto_name(svc->protocol),
1852 ntohl(svc->addr.ip),
1853 ntohs(svc->port),
1854 svc->scheduler->name);
1855 } else {
1da177e4
LT
1856 seq_printf(seq, "FWM %08X %s ",
1857 svc->fwmark, svc->scheduler->name);
667a5f18 1858 }
1da177e4
LT
1859
1860 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1861 seq_printf(seq, "persistent %d %08X\n",
1862 svc->timeout,
1863 ntohl(svc->netmask));
1864 else
1865 seq_putc(seq, '\n');
1866
1867 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1868#ifdef CONFIG_IP_VS_IPV6
1869 if (dest->af == AF_INET6)
1870 seq_printf(seq,
1871 " -> [" NIP6_FMT "]:%04X"
1872 " %-7s %-6d %-10d %-10d\n",
1873 NIP6(dest->addr.in6),
1874 ntohs(dest->port),
1875 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1876 atomic_read(&dest->weight),
1877 atomic_read(&dest->activeconns),
1878 atomic_read(&dest->inactconns));
1879 else
1880#endif
1881 seq_printf(seq,
1882 " -> %08X:%04X "
1883 "%-7s %-6d %-10d %-10d\n",
1884 ntohl(dest->addr.ip),
1885 ntohs(dest->port),
1886 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1887 atomic_read(&dest->weight),
1888 atomic_read(&dest->activeconns),
1889 atomic_read(&dest->inactconns));
1890
1da177e4
LT
1891 }
1892 }
1893 return 0;
1894}
1895
56b3d975 1896static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1897 .start = ip_vs_info_seq_start,
1898 .next = ip_vs_info_seq_next,
1899 .stop = ip_vs_info_seq_stop,
1900 .show = ip_vs_info_seq_show,
1901};
1902
1903static int ip_vs_info_open(struct inode *inode, struct file *file)
1904{
cf7732e4
PE
1905 return seq_open_private(file, &ip_vs_info_seq_ops,
1906 sizeof(struct ip_vs_iter));
1da177e4
LT
1907}
1908
9a32144e 1909static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1910 .owner = THIS_MODULE,
1911 .open = ip_vs_info_open,
1912 .read = seq_read,
1913 .llseek = seq_lseek,
1914 .release = seq_release_private,
1915};
1916
1917#endif
1918
519e49e8
SW
1919struct ip_vs_stats ip_vs_stats = {
1920 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1921};
1da177e4
LT
1922
1923#ifdef CONFIG_PROC_FS
1924static int ip_vs_stats_show(struct seq_file *seq, void *v)
1925{
1926
1927/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1928 seq_puts(seq,
1929 " Total Incoming Outgoing Incoming Outgoing\n");
1930 seq_printf(seq,
1931 " Conns Packets Packets Bytes Bytes\n");
1932
1933 spin_lock_bh(&ip_vs_stats.lock);
1934 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1935 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1936 (unsigned long long) ip_vs_stats.inbytes,
1937 (unsigned long long) ip_vs_stats.outbytes);
1938
1939/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1940 seq_puts(seq,
1941 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1942 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1943 ip_vs_stats.cps,
1944 ip_vs_stats.inpps,
1945 ip_vs_stats.outpps,
1946 ip_vs_stats.inbps,
1947 ip_vs_stats.outbps);
1948 spin_unlock_bh(&ip_vs_stats.lock);
1949
1950 return 0;
1951}
1952
1953static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1954{
1955 return single_open(file, ip_vs_stats_show, NULL);
1956}
1957
9a32144e 1958static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1959 .owner = THIS_MODULE,
1960 .open = ip_vs_stats_seq_open,
1961 .read = seq_read,
1962 .llseek = seq_lseek,
1963 .release = single_release,
1964};
1965
1966#endif
1967
1968/*
1969 * Set timeout values for tcp tcpfin udp in the timeout_table.
1970 */
1971static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1972{
1973 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1974 u->tcp_timeout,
1975 u->tcp_fin_timeout,
1976 u->udp_timeout);
1977
1978#ifdef CONFIG_IP_VS_PROTO_TCP
1979 if (u->tcp_timeout) {
1980 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1981 = u->tcp_timeout * HZ;
1982 }
1983
1984 if (u->tcp_fin_timeout) {
1985 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1986 = u->tcp_fin_timeout * HZ;
1987 }
1988#endif
1989
1990#ifdef CONFIG_IP_VS_PROTO_UDP
1991 if (u->udp_timeout) {
1992 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1993 = u->udp_timeout * HZ;
1994 }
1995#endif
1996 return 0;
1997}
1998
1999
2000#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2001#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2002#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2003 sizeof(struct ip_vs_dest_user))
2004#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2005#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2006#define MAX_ARG_LEN SVCDEST_ARG_LEN
2007
9b5b5cff 2008static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2009 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2010 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2011 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2012 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2013 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2014 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2015 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2016 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2017 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2018 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2019 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2020};
2021
c860c6b1
JV
2022static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2023 struct ip_vs_service_user *usvc_compat)
2024{
2025 usvc->af = AF_INET;
2026 usvc->protocol = usvc_compat->protocol;
2027 usvc->addr.ip = usvc_compat->addr;
2028 usvc->port = usvc_compat->port;
2029 usvc->fwmark = usvc_compat->fwmark;
2030
2031 /* Deep copy of sched_name is not needed here */
2032 usvc->sched_name = usvc_compat->sched_name;
2033
2034 usvc->flags = usvc_compat->flags;
2035 usvc->timeout = usvc_compat->timeout;
2036 usvc->netmask = usvc_compat->netmask;
2037}
2038
2039static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2040 struct ip_vs_dest_user *udest_compat)
2041{
2042 udest->addr.ip = udest_compat->addr;
2043 udest->port = udest_compat->port;
2044 udest->conn_flags = udest_compat->conn_flags;
2045 udest->weight = udest_compat->weight;
2046 udest->u_threshold = udest_compat->u_threshold;
2047 udest->l_threshold = udest_compat->l_threshold;
2048}
2049
1da177e4
LT
2050static int
2051do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2052{
2053 int ret;
2054 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2055 struct ip_vs_service_user *usvc_compat;
2056 struct ip_vs_service_user_kern usvc;
1da177e4 2057 struct ip_vs_service *svc;
c860c6b1
JV
2058 struct ip_vs_dest_user *udest_compat;
2059 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2060
2061 if (!capable(CAP_NET_ADMIN))
2062 return -EPERM;
2063
2064 if (len != set_arglen[SET_CMDID(cmd)]) {
2065 IP_VS_ERR("set_ctl: len %u != %u\n",
2066 len, set_arglen[SET_CMDID(cmd)]);
2067 return -EINVAL;
2068 }
2069
2070 if (copy_from_user(arg, user, len) != 0)
2071 return -EFAULT;
2072
2073 /* increase the module use count */
2074 ip_vs_use_count_inc();
2075
14cc3e2b 2076 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2077 ret = -ERESTARTSYS;
2078 goto out_dec;
2079 }
2080
2081 if (cmd == IP_VS_SO_SET_FLUSH) {
2082 /* Flush the virtual service */
2083 ret = ip_vs_flush();
2084 goto out_unlock;
2085 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2086 /* Set timeout values for (tcp tcpfin udp) */
2087 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2088 goto out_unlock;
2089 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2090 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2091 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2092 goto out_unlock;
2093 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2094 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2095 ret = stop_sync_thread(dm->state);
2096 goto out_unlock;
2097 }
2098
c860c6b1
JV
2099 usvc_compat = (struct ip_vs_service_user *)arg;
2100 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2101
2102 /* We only use the new structs internally, so copy userspace compat
2103 * structs to extended internal versions */
2104 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2105 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2106
2107 if (cmd == IP_VS_SO_SET_ZERO) {
2108 /* if no service address is set, zero counters in all */
c860c6b1 2109 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2110 ret = ip_vs_zero_all();
2111 goto out_unlock;
2112 }
2113 }
2114
2115 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2116 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2117 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2118 usvc.protocol, NIPQUAD(usvc.addr.ip),
2119 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2120 ret = -EFAULT;
2121 goto out_unlock;
2122 }
2123
2124 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2125 if (usvc.fwmark == 0)
b18610de
JV
2126 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2127 &usvc.addr, usvc.port);
1da177e4 2128 else
b18610de 2129 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2130
2131 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2132 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2133 ret = -ESRCH;
2134 goto out_unlock;
2135 }
2136
2137 switch (cmd) {
2138 case IP_VS_SO_SET_ADD:
2139 if (svc != NULL)
2140 ret = -EEXIST;
2141 else
c860c6b1 2142 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2143 break;
2144 case IP_VS_SO_SET_EDIT:
c860c6b1 2145 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2146 break;
2147 case IP_VS_SO_SET_DEL:
2148 ret = ip_vs_del_service(svc);
2149 if (!ret)
2150 goto out_unlock;
2151 break;
2152 case IP_VS_SO_SET_ZERO:
2153 ret = ip_vs_zero_service(svc);
2154 break;
2155 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2156 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2157 break;
2158 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2159 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2160 break;
2161 case IP_VS_SO_SET_DELDEST:
c860c6b1 2162 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2163 break;
2164 default:
2165 ret = -EINVAL;
2166 }
2167
2168 if (svc)
2169 ip_vs_service_put(svc);
2170
2171 out_unlock:
14cc3e2b 2172 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2173 out_dec:
2174 /* decrease the module use count */
2175 ip_vs_use_count_dec();
2176
2177 return ret;
2178}
2179
2180
2181static void
2182ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2183{
2184 spin_lock_bh(&src->lock);
2185 memcpy(dst, src, (char*)&src->lock - (char*)src);
2186 spin_unlock_bh(&src->lock);
2187}
2188
2189static void
2190ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2191{
2192 dst->protocol = src->protocol;
e7ade46a 2193 dst->addr = src->addr.ip;
1da177e4
LT
2194 dst->port = src->port;
2195 dst->fwmark = src->fwmark;
4da62fc7 2196 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2197 dst->flags = src->flags;
2198 dst->timeout = src->timeout / HZ;
2199 dst->netmask = src->netmask;
2200 dst->num_dests = src->num_dests;
2201 ip_vs_copy_stats(&dst->stats, &src->stats);
2202}
2203
2204static inline int
2205__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2206 struct ip_vs_get_services __user *uptr)
2207{
2208 int idx, count=0;
2209 struct ip_vs_service *svc;
2210 struct ip_vs_service_entry entry;
2211 int ret = 0;
2212
2213 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2214 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2215 if (count >= get->num_services)
2216 goto out;
4da62fc7 2217 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2218 ip_vs_copy_service(&entry, svc);
2219 if (copy_to_user(&uptr->entrytable[count],
2220 &entry, sizeof(entry))) {
2221 ret = -EFAULT;
2222 goto out;
2223 }
2224 count++;
2225 }
2226 }
2227
2228 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2229 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2230 if (count >= get->num_services)
2231 goto out;
4da62fc7 2232 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2233 ip_vs_copy_service(&entry, svc);
2234 if (copy_to_user(&uptr->entrytable[count],
2235 &entry, sizeof(entry))) {
2236 ret = -EFAULT;
2237 goto out;
2238 }
2239 count++;
2240 }
2241 }
2242 out:
2243 return ret;
2244}
2245
2246static inline int
2247__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2248 struct ip_vs_get_dests __user *uptr)
2249{
2250 struct ip_vs_service *svc;
b18610de 2251 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2252 int ret = 0;
2253
2254 if (get->fwmark)
b18610de 2255 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2256 else
b18610de
JV
2257 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2258 get->port);
2259
1da177e4
LT
2260 if (svc) {
2261 int count = 0;
2262 struct ip_vs_dest *dest;
2263 struct ip_vs_dest_entry entry;
2264
2265 list_for_each_entry(dest, &svc->destinations, n_list) {
2266 if (count >= get->num_dests)
2267 break;
2268
e7ade46a 2269 entry.addr = dest->addr.ip;
1da177e4
LT
2270 entry.port = dest->port;
2271 entry.conn_flags = atomic_read(&dest->conn_flags);
2272 entry.weight = atomic_read(&dest->weight);
2273 entry.u_threshold = dest->u_threshold;
2274 entry.l_threshold = dest->l_threshold;
2275 entry.activeconns = atomic_read(&dest->activeconns);
2276 entry.inactconns = atomic_read(&dest->inactconns);
2277 entry.persistconns = atomic_read(&dest->persistconns);
2278 ip_vs_copy_stats(&entry.stats, &dest->stats);
2279 if (copy_to_user(&uptr->entrytable[count],
2280 &entry, sizeof(entry))) {
2281 ret = -EFAULT;
2282 break;
2283 }
2284 count++;
2285 }
2286 ip_vs_service_put(svc);
2287 } else
2288 ret = -ESRCH;
2289 return ret;
2290}
2291
2292static inline void
2293__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2294{
2295#ifdef CONFIG_IP_VS_PROTO_TCP
2296 u->tcp_timeout =
2297 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2298 u->tcp_fin_timeout =
2299 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2300#endif
2301#ifdef CONFIG_IP_VS_PROTO_UDP
2302 u->udp_timeout =
2303 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2304#endif
2305}
2306
2307
2308#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2309#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2310#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2311#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2312#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2313#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2314#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2315
9b5b5cff 2316static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2317 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2318 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2319 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2320 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2321 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2322 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2323 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2324};
2325
2326static int
2327do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2328{
2329 unsigned char arg[128];
2330 int ret = 0;
2331
2332 if (!capable(CAP_NET_ADMIN))
2333 return -EPERM;
2334
2335 if (*len < get_arglen[GET_CMDID(cmd)]) {
2336 IP_VS_ERR("get_ctl: len %u < %u\n",
2337 *len, get_arglen[GET_CMDID(cmd)]);
2338 return -EINVAL;
2339 }
2340
2341 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2342 return -EFAULT;
2343
14cc3e2b 2344 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2345 return -ERESTARTSYS;
2346
2347 switch (cmd) {
2348 case IP_VS_SO_GET_VERSION:
2349 {
2350 char buf[64];
2351
2352 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2353 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2354 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2355 ret = -EFAULT;
2356 goto out;
2357 }
2358 *len = strlen(buf)+1;
2359 }
2360 break;
2361
2362 case IP_VS_SO_GET_INFO:
2363 {
2364 struct ip_vs_getinfo info;
2365 info.version = IP_VS_VERSION_CODE;
2366 info.size = IP_VS_CONN_TAB_SIZE;
2367 info.num_services = ip_vs_num_services;
2368 if (copy_to_user(user, &info, sizeof(info)) != 0)
2369 ret = -EFAULT;
2370 }
2371 break;
2372
2373 case IP_VS_SO_GET_SERVICES:
2374 {
2375 struct ip_vs_get_services *get;
2376 int size;
2377
2378 get = (struct ip_vs_get_services *)arg;
2379 size = sizeof(*get) +
2380 sizeof(struct ip_vs_service_entry) * get->num_services;
2381 if (*len != size) {
2382 IP_VS_ERR("length: %u != %u\n", *len, size);
2383 ret = -EINVAL;
2384 goto out;
2385 }
2386 ret = __ip_vs_get_service_entries(get, user);
2387 }
2388 break;
2389
2390 case IP_VS_SO_GET_SERVICE:
2391 {
2392 struct ip_vs_service_entry *entry;
2393 struct ip_vs_service *svc;
b18610de 2394 union nf_inet_addr addr;
1da177e4
LT
2395
2396 entry = (struct ip_vs_service_entry *)arg;
b18610de 2397 addr.ip = entry->addr;
1da177e4 2398 if (entry->fwmark)
b18610de 2399 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2400 else
b18610de
JV
2401 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2402 &addr, entry->port);
1da177e4
LT
2403 if (svc) {
2404 ip_vs_copy_service(entry, svc);
2405 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2406 ret = -EFAULT;
2407 ip_vs_service_put(svc);
2408 } else
2409 ret = -ESRCH;
2410 }
2411 break;
2412
2413 case IP_VS_SO_GET_DESTS:
2414 {
2415 struct ip_vs_get_dests *get;
2416 int size;
2417
2418 get = (struct ip_vs_get_dests *)arg;
2419 size = sizeof(*get) +
2420 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2421 if (*len != size) {
2422 IP_VS_ERR("length: %u != %u\n", *len, size);
2423 ret = -EINVAL;
2424 goto out;
2425 }
2426 ret = __ip_vs_get_dest_entries(get, user);
2427 }
2428 break;
2429
2430 case IP_VS_SO_GET_TIMEOUT:
2431 {
2432 struct ip_vs_timeout_user t;
2433
2434 __ip_vs_get_timeouts(&t);
2435 if (copy_to_user(user, &t, sizeof(t)) != 0)
2436 ret = -EFAULT;
2437 }
2438 break;
2439
2440 case IP_VS_SO_GET_DAEMON:
2441 {
2442 struct ip_vs_daemon_user d[2];
2443
2444 memset(&d, 0, sizeof(d));
2445 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2446 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2447 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2448 d[0].syncid = ip_vs_master_syncid;
2449 }
2450 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2451 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2452 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2453 d[1].syncid = ip_vs_backup_syncid;
2454 }
2455 if (copy_to_user(user, &d, sizeof(d)) != 0)
2456 ret = -EFAULT;
2457 }
2458 break;
2459
2460 default:
2461 ret = -EINVAL;
2462 }
2463
2464 out:
14cc3e2b 2465 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2466 return ret;
2467}
2468
2469
2470static struct nf_sockopt_ops ip_vs_sockopts = {
2471 .pf = PF_INET,
2472 .set_optmin = IP_VS_BASE_CTL,
2473 .set_optmax = IP_VS_SO_SET_MAX+1,
2474 .set = do_ip_vs_set_ctl,
2475 .get_optmin = IP_VS_BASE_CTL,
2476 .get_optmax = IP_VS_SO_GET_MAX+1,
2477 .get = do_ip_vs_get_ctl,
16fcec35 2478 .owner = THIS_MODULE,
1da177e4
LT
2479};
2480
9a812198
JV
2481/*
2482 * Generic Netlink interface
2483 */
2484
2485/* IPVS genetlink family */
2486static struct genl_family ip_vs_genl_family = {
2487 .id = GENL_ID_GENERATE,
2488 .hdrsize = 0,
2489 .name = IPVS_GENL_NAME,
2490 .version = IPVS_GENL_VERSION,
2491 .maxattr = IPVS_CMD_MAX,
2492};
2493
2494/* Policy used for first-level command attributes */
2495static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2496 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2497 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2498 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2499 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2500 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2501 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2502};
2503
2504/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2505static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2506 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2507 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2508 .len = IP_VS_IFNAME_MAXLEN },
2509 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2510};
2511
2512/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2513static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2514 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2515 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2516 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2517 .len = sizeof(union nf_inet_addr) },
2518 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2519 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2520 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2521 .len = IP_VS_SCHEDNAME_MAXLEN },
2522 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2523 .len = sizeof(struct ip_vs_flags) },
2524 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2525 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2526 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2527};
2528
2529/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2530static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2531 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2532 .len = sizeof(union nf_inet_addr) },
2533 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2534 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2535 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2536 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2537 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2538 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2539 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2540 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2541 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2542};
2543
2544static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2545 struct ip_vs_stats *stats)
2546{
2547 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2548 if (!nl_stats)
2549 return -EMSGSIZE;
2550
2551 spin_lock_bh(&stats->lock);
2552
2553 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2554 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2555 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2556 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2557 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2558 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2559 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2560 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2561 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2562 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2563
2564 spin_unlock_bh(&stats->lock);
2565
2566 nla_nest_end(skb, nl_stats);
2567
2568 return 0;
2569
2570nla_put_failure:
2571 spin_unlock_bh(&stats->lock);
2572 nla_nest_cancel(skb, nl_stats);
2573 return -EMSGSIZE;
2574}
2575
2576static int ip_vs_genl_fill_service(struct sk_buff *skb,
2577 struct ip_vs_service *svc)
2578{
2579 struct nlattr *nl_service;
2580 struct ip_vs_flags flags = { .flags = svc->flags,
2581 .mask = ~0 };
2582
2583 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2584 if (!nl_service)
2585 return -EMSGSIZE;
2586
2587 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2588
2589 if (svc->fwmark) {
2590 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2591 } else {
2592 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2593 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2594 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2595 }
2596
2597 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2598 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2599 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2600 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2601
2602 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2603 goto nla_put_failure;
2604
2605 nla_nest_end(skb, nl_service);
2606
2607 return 0;
2608
2609nla_put_failure:
2610 nla_nest_cancel(skb, nl_service);
2611 return -EMSGSIZE;
2612}
2613
2614static int ip_vs_genl_dump_service(struct sk_buff *skb,
2615 struct ip_vs_service *svc,
2616 struct netlink_callback *cb)
2617{
2618 void *hdr;
2619
2620 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2621 &ip_vs_genl_family, NLM_F_MULTI,
2622 IPVS_CMD_NEW_SERVICE);
2623 if (!hdr)
2624 return -EMSGSIZE;
2625
2626 if (ip_vs_genl_fill_service(skb, svc) < 0)
2627 goto nla_put_failure;
2628
2629 return genlmsg_end(skb, hdr);
2630
2631nla_put_failure:
2632 genlmsg_cancel(skb, hdr);
2633 return -EMSGSIZE;
2634}
2635
2636static int ip_vs_genl_dump_services(struct sk_buff *skb,
2637 struct netlink_callback *cb)
2638{
2639 int idx = 0, i;
2640 int start = cb->args[0];
2641 struct ip_vs_service *svc;
2642
2643 mutex_lock(&__ip_vs_mutex);
2644 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2645 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2646 if (++idx <= start)
2647 continue;
2648 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2649 idx--;
2650 goto nla_put_failure;
2651 }
2652 }
2653 }
2654
2655 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2656 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2657 if (++idx <= start)
2658 continue;
2659 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2660 idx--;
2661 goto nla_put_failure;
2662 }
2663 }
2664 }
2665
2666nla_put_failure:
2667 mutex_unlock(&__ip_vs_mutex);
2668 cb->args[0] = idx;
2669
2670 return skb->len;
2671}
2672
c860c6b1 2673static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2674 struct nlattr *nla, int full_entry)
2675{
2676 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2677 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2678
2679 /* Parse mandatory identifying service fields first */
2680 if (nla == NULL ||
2681 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2682 return -EINVAL;
2683
2684 nla_af = attrs[IPVS_SVC_ATTR_AF];
2685 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2686 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2687 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2688 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2689
2690 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2691 return -EINVAL;
2692
c860c6b1 2693 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2694 /* For now, only support IPv4 */
2695 if (nla_get_u16(nla_af) != AF_INET)
2696 return -EAFNOSUPPORT;
2697
2698 if (nla_fwmark) {
2699 usvc->protocol = IPPROTO_TCP;
2700 usvc->fwmark = nla_get_u32(nla_fwmark);
2701 } else {
2702 usvc->protocol = nla_get_u16(nla_protocol);
2703 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2704 usvc->port = nla_get_u16(nla_port);
2705 usvc->fwmark = 0;
2706 }
2707
2708 /* If a full entry was requested, check for the additional fields */
2709 if (full_entry) {
2710 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2711 *nla_netmask;
2712 struct ip_vs_flags flags;
2713 struct ip_vs_service *svc;
2714
2715 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2716 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2717 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2718 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2719
2720 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2721 return -EINVAL;
2722
2723 nla_memcpy(&flags, nla_flags, sizeof(flags));
2724
2725 /* prefill flags from service if it already exists */
2726 if (usvc->fwmark)
b18610de 2727 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2728 else
b18610de
JV
2729 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2730 &usvc->addr, usvc->port);
9a812198
JV
2731 if (svc) {
2732 usvc->flags = svc->flags;
2733 ip_vs_service_put(svc);
2734 } else
2735 usvc->flags = 0;
2736
2737 /* set new flags from userland */
2738 usvc->flags = (usvc->flags & ~flags.mask) |
2739 (flags.flags & flags.mask);
c860c6b1 2740 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2741 usvc->timeout = nla_get_u32(nla_timeout);
2742 usvc->netmask = nla_get_u32(nla_netmask);
2743 }
2744
2745 return 0;
2746}
2747
2748static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2749{
c860c6b1 2750 struct ip_vs_service_user_kern usvc;
9a812198
JV
2751 int ret;
2752
2753 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2754 if (ret)
2755 return ERR_PTR(ret);
2756
2757 if (usvc.fwmark)
b18610de 2758 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2759 else
b18610de
JV
2760 return __ip_vs_service_get(usvc.af, usvc.protocol,
2761 &usvc.addr, usvc.port);
9a812198
JV
2762}
2763
2764static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2765{
2766 struct nlattr *nl_dest;
2767
2768 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2769 if (!nl_dest)
2770 return -EMSGSIZE;
2771
2772 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2773 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2774
2775 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2776 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2777 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2778 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2779 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2780 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2781 atomic_read(&dest->activeconns));
2782 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2783 atomic_read(&dest->inactconns));
2784 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2785 atomic_read(&dest->persistconns));
2786
2787 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2788 goto nla_put_failure;
2789
2790 nla_nest_end(skb, nl_dest);
2791
2792 return 0;
2793
2794nla_put_failure:
2795 nla_nest_cancel(skb, nl_dest);
2796 return -EMSGSIZE;
2797}
2798
2799static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2800 struct netlink_callback *cb)
2801{
2802 void *hdr;
2803
2804 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2805 &ip_vs_genl_family, NLM_F_MULTI,
2806 IPVS_CMD_NEW_DEST);
2807 if (!hdr)
2808 return -EMSGSIZE;
2809
2810 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2811 goto nla_put_failure;
2812
2813 return genlmsg_end(skb, hdr);
2814
2815nla_put_failure:
2816 genlmsg_cancel(skb, hdr);
2817 return -EMSGSIZE;
2818}
2819
2820static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2821 struct netlink_callback *cb)
2822{
2823 int idx = 0;
2824 int start = cb->args[0];
2825 struct ip_vs_service *svc;
2826 struct ip_vs_dest *dest;
2827 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2828
2829 mutex_lock(&__ip_vs_mutex);
2830
2831 /* Try to find the service for which to dump destinations */
2832 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2833 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2834 goto out_err;
2835
2836 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2837 if (IS_ERR(svc) || svc == NULL)
2838 goto out_err;
2839
2840 /* Dump the destinations */
2841 list_for_each_entry(dest, &svc->destinations, n_list) {
2842 if (++idx <= start)
2843 continue;
2844 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2845 idx--;
2846 goto nla_put_failure;
2847 }
2848 }
2849
2850nla_put_failure:
2851 cb->args[0] = idx;
2852 ip_vs_service_put(svc);
2853
2854out_err:
2855 mutex_unlock(&__ip_vs_mutex);
2856
2857 return skb->len;
2858}
2859
c860c6b1 2860static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2861 struct nlattr *nla, int full_entry)
2862{
2863 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2864 struct nlattr *nla_addr, *nla_port;
2865
2866 /* Parse mandatory identifying destination fields first */
2867 if (nla == NULL ||
2868 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2869 return -EINVAL;
2870
2871 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2872 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2873
2874 if (!(nla_addr && nla_port))
2875 return -EINVAL;
2876
2877 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2878 udest->port = nla_get_u16(nla_port);
2879
2880 /* If a full entry was requested, check for the additional fields */
2881 if (full_entry) {
2882 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2883 *nla_l_thresh;
2884
2885 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2886 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2887 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2888 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2889
2890 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2891 return -EINVAL;
2892
2893 udest->conn_flags = nla_get_u32(nla_fwd)
2894 & IP_VS_CONN_F_FWD_MASK;
2895 udest->weight = nla_get_u32(nla_weight);
2896 udest->u_threshold = nla_get_u32(nla_u_thresh);
2897 udest->l_threshold = nla_get_u32(nla_l_thresh);
2898 }
2899
2900 return 0;
2901}
2902
2903static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2904 const char *mcast_ifn, __be32 syncid)
2905{
2906 struct nlattr *nl_daemon;
2907
2908 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2909 if (!nl_daemon)
2910 return -EMSGSIZE;
2911
2912 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2913 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2914 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2915
2916 nla_nest_end(skb, nl_daemon);
2917
2918 return 0;
2919
2920nla_put_failure:
2921 nla_nest_cancel(skb, nl_daemon);
2922 return -EMSGSIZE;
2923}
2924
2925static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2926 const char *mcast_ifn, __be32 syncid,
2927 struct netlink_callback *cb)
2928{
2929 void *hdr;
2930 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2931 &ip_vs_genl_family, NLM_F_MULTI,
2932 IPVS_CMD_NEW_DAEMON);
2933 if (!hdr)
2934 return -EMSGSIZE;
2935
2936 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2937 goto nla_put_failure;
2938
2939 return genlmsg_end(skb, hdr);
2940
2941nla_put_failure:
2942 genlmsg_cancel(skb, hdr);
2943 return -EMSGSIZE;
2944}
2945
2946static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2947 struct netlink_callback *cb)
2948{
2949 mutex_lock(&__ip_vs_mutex);
2950 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2951 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2952 ip_vs_master_mcast_ifn,
2953 ip_vs_master_syncid, cb) < 0)
2954 goto nla_put_failure;
2955
2956 cb->args[0] = 1;
2957 }
2958
2959 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2960 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2961 ip_vs_backup_mcast_ifn,
2962 ip_vs_backup_syncid, cb) < 0)
2963 goto nla_put_failure;
2964
2965 cb->args[1] = 1;
2966 }
2967
2968nla_put_failure:
2969 mutex_unlock(&__ip_vs_mutex);
2970
2971 return skb->len;
2972}
2973
2974static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2975{
2976 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2977 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2978 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2979 return -EINVAL;
2980
2981 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2982 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2983 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2984}
2985
2986static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2987{
2988 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2989 return -EINVAL;
2990
2991 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2992}
2993
2994static int ip_vs_genl_set_config(struct nlattr **attrs)
2995{
2996 struct ip_vs_timeout_user t;
2997
2998 __ip_vs_get_timeouts(&t);
2999
3000 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3001 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3002
3003 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3004 t.tcp_fin_timeout =
3005 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3006
3007 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3008 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3009
3010 return ip_vs_set_timeout(&t);
3011}
3012
3013static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3014{
3015 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3016 struct ip_vs_service_user_kern usvc;
3017 struct ip_vs_dest_user_kern udest;
9a812198
JV
3018 int ret = 0, cmd;
3019 int need_full_svc = 0, need_full_dest = 0;
3020
3021 cmd = info->genlhdr->cmd;
3022
3023 mutex_lock(&__ip_vs_mutex);
3024
3025 if (cmd == IPVS_CMD_FLUSH) {
3026 ret = ip_vs_flush();
3027 goto out;
3028 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3029 ret = ip_vs_genl_set_config(info->attrs);
3030 goto out;
3031 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3032 cmd == IPVS_CMD_DEL_DAEMON) {
3033
3034 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3035
3036 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3037 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3038 info->attrs[IPVS_CMD_ATTR_DAEMON],
3039 ip_vs_daemon_policy)) {
3040 ret = -EINVAL;
3041 goto out;
3042 }
3043
3044 if (cmd == IPVS_CMD_NEW_DAEMON)
3045 ret = ip_vs_genl_new_daemon(daemon_attrs);
3046 else
3047 ret = ip_vs_genl_del_daemon(daemon_attrs);
3048 goto out;
3049 } else if (cmd == IPVS_CMD_ZERO &&
3050 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3051 ret = ip_vs_zero_all();
3052 goto out;
3053 }
3054
3055 /* All following commands require a service argument, so check if we
3056 * received a valid one. We need a full service specification when
3057 * adding / editing a service. Only identifying members otherwise. */
3058 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3059 need_full_svc = 1;
3060
3061 ret = ip_vs_genl_parse_service(&usvc,
3062 info->attrs[IPVS_CMD_ATTR_SERVICE],
3063 need_full_svc);
3064 if (ret)
3065 goto out;
3066
3067 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3068 if (usvc.fwmark == 0)
b18610de
JV
3069 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3070 &usvc.addr, usvc.port);
9a812198 3071 else
b18610de 3072 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3073
3074 /* Unless we're adding a new service, the service must already exist */
3075 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3076 ret = -ESRCH;
3077 goto out;
3078 }
3079
3080 /* Destination commands require a valid destination argument. For
3081 * adding / editing a destination, we need a full destination
3082 * specification. */
3083 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3084 cmd == IPVS_CMD_DEL_DEST) {
3085 if (cmd != IPVS_CMD_DEL_DEST)
3086 need_full_dest = 1;
3087
3088 ret = ip_vs_genl_parse_dest(&udest,
3089 info->attrs[IPVS_CMD_ATTR_DEST],
3090 need_full_dest);
3091 if (ret)
3092 goto out;
3093 }
3094
3095 switch (cmd) {
3096 case IPVS_CMD_NEW_SERVICE:
3097 if (svc == NULL)
3098 ret = ip_vs_add_service(&usvc, &svc);
3099 else
3100 ret = -EEXIST;
3101 break;
3102 case IPVS_CMD_SET_SERVICE:
3103 ret = ip_vs_edit_service(svc, &usvc);
3104 break;
3105 case IPVS_CMD_DEL_SERVICE:
3106 ret = ip_vs_del_service(svc);
3107 break;
3108 case IPVS_CMD_NEW_DEST:
3109 ret = ip_vs_add_dest(svc, &udest);
3110 break;
3111 case IPVS_CMD_SET_DEST:
3112 ret = ip_vs_edit_dest(svc, &udest);
3113 break;
3114 case IPVS_CMD_DEL_DEST:
3115 ret = ip_vs_del_dest(svc, &udest);
3116 break;
3117 case IPVS_CMD_ZERO:
3118 ret = ip_vs_zero_service(svc);
3119 break;
3120 default:
3121 ret = -EINVAL;
3122 }
3123
3124out:
3125 if (svc)
3126 ip_vs_service_put(svc);
3127 mutex_unlock(&__ip_vs_mutex);
3128
3129 return ret;
3130}
3131
3132static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3133{
3134 struct sk_buff *msg;
3135 void *reply;
3136 int ret, cmd, reply_cmd;
3137
3138 cmd = info->genlhdr->cmd;
3139
3140 if (cmd == IPVS_CMD_GET_SERVICE)
3141 reply_cmd = IPVS_CMD_NEW_SERVICE;
3142 else if (cmd == IPVS_CMD_GET_INFO)
3143 reply_cmd = IPVS_CMD_SET_INFO;
3144 else if (cmd == IPVS_CMD_GET_CONFIG)
3145 reply_cmd = IPVS_CMD_SET_CONFIG;
3146 else {
3147 IP_VS_ERR("unknown Generic Netlink command\n");
3148 return -EINVAL;
3149 }
3150
3151 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3152 if (!msg)
3153 return -ENOMEM;
3154
3155 mutex_lock(&__ip_vs_mutex);
3156
3157 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3158 if (reply == NULL)
3159 goto nla_put_failure;
3160
3161 switch (cmd) {
3162 case IPVS_CMD_GET_SERVICE:
3163 {
3164 struct ip_vs_service *svc;
3165
3166 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3167 if (IS_ERR(svc)) {
3168 ret = PTR_ERR(svc);
3169 goto out_err;
3170 } else if (svc) {
3171 ret = ip_vs_genl_fill_service(msg, svc);
3172 ip_vs_service_put(svc);
3173 if (ret)
3174 goto nla_put_failure;
3175 } else {
3176 ret = -ESRCH;
3177 goto out_err;
3178 }
3179
3180 break;
3181 }
3182
3183 case IPVS_CMD_GET_CONFIG:
3184 {
3185 struct ip_vs_timeout_user t;
3186
3187 __ip_vs_get_timeouts(&t);
3188#ifdef CONFIG_IP_VS_PROTO_TCP
3189 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3190 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3191 t.tcp_fin_timeout);
3192#endif
3193#ifdef CONFIG_IP_VS_PROTO_UDP
3194 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3195#endif
3196
3197 break;
3198 }
3199
3200 case IPVS_CMD_GET_INFO:
3201 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3202 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3203 IP_VS_CONN_TAB_SIZE);
3204 break;
3205 }
3206
3207 genlmsg_end(msg, reply);
3208 ret = genlmsg_unicast(msg, info->snd_pid);
3209 goto out;
3210
3211nla_put_failure:
3212 IP_VS_ERR("not enough space in Netlink message\n");
3213 ret = -EMSGSIZE;
3214
3215out_err:
3216 nlmsg_free(msg);
3217out:
3218 mutex_unlock(&__ip_vs_mutex);
3219
3220 return ret;
3221}
3222
3223
3224static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3225 {
3226 .cmd = IPVS_CMD_NEW_SERVICE,
3227 .flags = GENL_ADMIN_PERM,
3228 .policy = ip_vs_cmd_policy,
3229 .doit = ip_vs_genl_set_cmd,
3230 },
3231 {
3232 .cmd = IPVS_CMD_SET_SERVICE,
3233 .flags = GENL_ADMIN_PERM,
3234 .policy = ip_vs_cmd_policy,
3235 .doit = ip_vs_genl_set_cmd,
3236 },
3237 {
3238 .cmd = IPVS_CMD_DEL_SERVICE,
3239 .flags = GENL_ADMIN_PERM,
3240 .policy = ip_vs_cmd_policy,
3241 .doit = ip_vs_genl_set_cmd,
3242 },
3243 {
3244 .cmd = IPVS_CMD_GET_SERVICE,
3245 .flags = GENL_ADMIN_PERM,
3246 .doit = ip_vs_genl_get_cmd,
3247 .dumpit = ip_vs_genl_dump_services,
3248 .policy = ip_vs_cmd_policy,
3249 },
3250 {
3251 .cmd = IPVS_CMD_NEW_DEST,
3252 .flags = GENL_ADMIN_PERM,
3253 .policy = ip_vs_cmd_policy,
3254 .doit = ip_vs_genl_set_cmd,
3255 },
3256 {
3257 .cmd = IPVS_CMD_SET_DEST,
3258 .flags = GENL_ADMIN_PERM,
3259 .policy = ip_vs_cmd_policy,
3260 .doit = ip_vs_genl_set_cmd,
3261 },
3262 {
3263 .cmd = IPVS_CMD_DEL_DEST,
3264 .flags = GENL_ADMIN_PERM,
3265 .policy = ip_vs_cmd_policy,
3266 .doit = ip_vs_genl_set_cmd,
3267 },
3268 {
3269 .cmd = IPVS_CMD_GET_DEST,
3270 .flags = GENL_ADMIN_PERM,
3271 .policy = ip_vs_cmd_policy,
3272 .dumpit = ip_vs_genl_dump_dests,
3273 },
3274 {
3275 .cmd = IPVS_CMD_NEW_DAEMON,
3276 .flags = GENL_ADMIN_PERM,
3277 .policy = ip_vs_cmd_policy,
3278 .doit = ip_vs_genl_set_cmd,
3279 },
3280 {
3281 .cmd = IPVS_CMD_DEL_DAEMON,
3282 .flags = GENL_ADMIN_PERM,
3283 .policy = ip_vs_cmd_policy,
3284 .doit = ip_vs_genl_set_cmd,
3285 },
3286 {
3287 .cmd = IPVS_CMD_GET_DAEMON,
3288 .flags = GENL_ADMIN_PERM,
3289 .dumpit = ip_vs_genl_dump_daemons,
3290 },
3291 {
3292 .cmd = IPVS_CMD_SET_CONFIG,
3293 .flags = GENL_ADMIN_PERM,
3294 .policy = ip_vs_cmd_policy,
3295 .doit = ip_vs_genl_set_cmd,
3296 },
3297 {
3298 .cmd = IPVS_CMD_GET_CONFIG,
3299 .flags = GENL_ADMIN_PERM,
3300 .doit = ip_vs_genl_get_cmd,
3301 },
3302 {
3303 .cmd = IPVS_CMD_GET_INFO,
3304 .flags = GENL_ADMIN_PERM,
3305 .doit = ip_vs_genl_get_cmd,
3306 },
3307 {
3308 .cmd = IPVS_CMD_ZERO,
3309 .flags = GENL_ADMIN_PERM,
3310 .policy = ip_vs_cmd_policy,
3311 .doit = ip_vs_genl_set_cmd,
3312 },
3313 {
3314 .cmd = IPVS_CMD_FLUSH,
3315 .flags = GENL_ADMIN_PERM,
3316 .doit = ip_vs_genl_set_cmd,
3317 },
3318};
3319
3320static int __init ip_vs_genl_register(void)
3321{
3322 int ret, i;
3323
3324 ret = genl_register_family(&ip_vs_genl_family);
3325 if (ret)
3326 return ret;
3327
3328 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3329 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3330 if (ret)
3331 goto err_out;
3332 }
3333 return 0;
3334
3335err_out:
3336 genl_unregister_family(&ip_vs_genl_family);
3337 return ret;
3338}
3339
3340static void ip_vs_genl_unregister(void)
3341{
3342 genl_unregister_family(&ip_vs_genl_family);
3343}
3344
3345/* End of Generic Netlink interface definitions */
3346
1da177e4 3347
048cf48b 3348int __init ip_vs_control_init(void)
1da177e4
LT
3349{
3350 int ret;
3351 int idx;
3352
3353 EnterFunction(2);
3354
3355 ret = nf_register_sockopt(&ip_vs_sockopts);
3356 if (ret) {
3357 IP_VS_ERR("cannot register sockopt.\n");
3358 return ret;
3359 }
3360
9a812198
JV
3361 ret = ip_vs_genl_register();
3362 if (ret) {
3363 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3364 nf_unregister_sockopt(&ip_vs_sockopts);
3365 return ret;
3366 }
3367
457c4cbc
EB
3368 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3369 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3370
90754f8e 3371 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3372
3373 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3374 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3375 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3376 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3377 }
3378 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3379 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3380 }
3381
1da177e4
LT
3382 ip_vs_new_estimator(&ip_vs_stats);
3383
3384 /* Hook the defense timer */
3385 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3386
3387 LeaveFunction(2);
3388 return 0;
3389}
3390
3391
3392void ip_vs_control_cleanup(void)
3393{
3394 EnterFunction(2);
3395 ip_vs_trash_cleanup();
3396 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3397 cancel_work_sync(&defense_work.work);
1da177e4
LT
3398 ip_vs_kill_estimator(&ip_vs_stats);
3399 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3400 proc_net_remove(&init_net, "ip_vs_stats");
3401 proc_net_remove(&init_net, "ip_vs");
9a812198 3402 ip_vs_genl_unregister();
1da177e4
LT
3403 nf_unregister_sockopt(&ip_vs_sockopts);
3404 LeaveFunction(2);
3405}