]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add function to determine if IPv6 address is local
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0
SH
746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
1da177e4 759 ip_vs_zero_estimator(stats);
e93615d0 760
3a14a313 761 spin_unlock_bh(&stats->lock);
1da177e4
LT
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
09571c7a
VB
778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
1da177e4
LT
790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
c860c6b1 830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
09571c7a
VB
838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if (!(atype & IPV6_ADDR_UNICAST) &&
842 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843 return -EINVAL;
844 } else
845#endif
846 {
847 atype = inet_addr_type(&init_net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849 return -EINVAL;
850 }
1da177e4 851
0da974f4 852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
853 if (dest == NULL) {
854 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855 return -ENOMEM;
856 }
1da177e4 857
c860c6b1 858 dest->af = svc->af;
1da177e4 859 dest->protocol = svc->protocol;
c860c6b1 860 dest->vaddr = svc->addr;
1da177e4
LT
861 dest->vport = svc->port;
862 dest->vfwmark = svc->fwmark;
c860c6b1 863 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
864 dest->port = udest->port;
865
866 atomic_set(&dest->activeconns, 0);
867 atomic_set(&dest->inactconns, 0);
868 atomic_set(&dest->persistconns, 0);
869 atomic_set(&dest->refcnt, 0);
870
871 INIT_LIST_HEAD(&dest->d_list);
872 spin_lock_init(&dest->dst_lock);
873 spin_lock_init(&dest->stats.lock);
874 __ip_vs_update_dest(svc, dest, udest);
875 ip_vs_new_estimator(&dest->stats);
876
877 *dest_p = dest;
878
879 LeaveFunction(2);
880 return 0;
881}
882
883
884/*
885 * Add a destination into an existing service
886 */
887static int
c860c6b1 888ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
889{
890 struct ip_vs_dest *dest;
c860c6b1 891 union nf_inet_addr daddr;
014d730d 892 __be16 dport = udest->port;
1da177e4
LT
893 int ret;
894
895 EnterFunction(2);
896
897 if (udest->weight < 0) {
898 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899 return -ERANGE;
900 }
901
902 if (udest->l_threshold > udest->u_threshold) {
903 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904 "upper threshold\n");
905 return -ERANGE;
906 }
907
c860c6b1
JV
908 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
1da177e4
LT
910 /*
911 * Check if the dest already exists in the list
912 */
7937df15
JV
913 dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
1da177e4
LT
915 if (dest != NULL) {
916 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917 return -EEXIST;
918 }
919
920 /*
921 * Check if the dest already exists in the trash and
922 * is from the same service
923 */
7937df15
JV
924 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
1da177e4
LT
926 if (dest != NULL) {
927 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 928 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
929 NIPQUAD(daddr), ntohs(dport),
930 atomic_read(&dest->refcnt),
931 dest->vfwmark,
e7ade46a 932 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
933 ntohs(dest->vport));
934 __ip_vs_update_dest(svc, dest, udest);
935
936 /*
937 * Get the destination from the trash
938 */
939 list_del(&dest->n_list);
940
941 ip_vs_new_estimator(&dest->stats);
942
943 write_lock_bh(&__ip_vs_svc_lock);
944
945 /*
946 * Wait until all other svc users go away.
947 */
948 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
949
950 list_add(&dest->n_list, &svc->destinations);
951 svc->num_dests++;
952
953 /* call the update_service function of its scheduler */
82dfb6f3
SW
954 if (svc->scheduler->update_service)
955 svc->scheduler->update_service(svc);
1da177e4
LT
956
957 write_unlock_bh(&__ip_vs_svc_lock);
958 return 0;
959 }
960
961 /*
962 * Allocate and initialize the dest structure
963 */
964 ret = ip_vs_new_dest(svc, udest, &dest);
965 if (ret) {
966 return ret;
967 }
968
969 /*
970 * Add the dest entry into the list
971 */
972 atomic_inc(&dest->refcnt);
973
974 write_lock_bh(&__ip_vs_svc_lock);
975
976 /*
977 * Wait until all other svc users go away.
978 */
979 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
980
981 list_add(&dest->n_list, &svc->destinations);
982 svc->num_dests++;
983
984 /* call the update_service function of its scheduler */
82dfb6f3
SW
985 if (svc->scheduler->update_service)
986 svc->scheduler->update_service(svc);
1da177e4
LT
987
988 write_unlock_bh(&__ip_vs_svc_lock);
989
990 LeaveFunction(2);
991
992 return 0;
993}
994
995
996/*
997 * Edit a destination in the given service
998 */
999static int
c860c6b1 1000ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1001{
1002 struct ip_vs_dest *dest;
c860c6b1 1003 union nf_inet_addr daddr;
014d730d 1004 __be16 dport = udest->port;
1da177e4
LT
1005
1006 EnterFunction(2);
1007
1008 if (udest->weight < 0) {
1009 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1010 return -ERANGE;
1011 }
1012
1013 if (udest->l_threshold > udest->u_threshold) {
1014 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1015 "upper threshold\n");
1016 return -ERANGE;
1017 }
1018
c860c6b1
JV
1019 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1020
1da177e4
LT
1021 /*
1022 * Lookup the destination list
1023 */
7937df15
JV
1024 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1025
1da177e4
LT
1026 if (dest == NULL) {
1027 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1028 return -ENOENT;
1029 }
1030
1031 __ip_vs_update_dest(svc, dest, udest);
1032
1033 write_lock_bh(&__ip_vs_svc_lock);
1034
1035 /* Wait until all other svc users go away */
cae7ca3d 1036 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1037
1038 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1039 if (svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
1da177e4
LT
1041
1042 write_unlock_bh(&__ip_vs_svc_lock);
1043
1044 LeaveFunction(2);
1045
1046 return 0;
1047}
1048
1049
1050/*
1051 * Delete a destination (must be already unlinked from the service)
1052 */
1053static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1054{
1055 ip_vs_kill_estimator(&dest->stats);
1056
1057 /*
1058 * Remove it from the d-linked list with the real services.
1059 */
1060 write_lock_bh(&__ip_vs_rs_lock);
1061 ip_vs_rs_unhash(dest);
1062 write_unlock_bh(&__ip_vs_rs_lock);
1063
1064 /*
1065 * Decrease the refcnt of the dest, and free the dest
1066 * if nobody refers to it (refcnt=0). Otherwise, throw
1067 * the destination into the trash.
1068 */
1069 if (atomic_dec_and_test(&dest->refcnt)) {
1070 ip_vs_dst_reset(dest);
1071 /* simply decrease svc->refcnt here, let the caller check
1072 and release the service if nobody refers to it.
1073 Only user context can release destination and service,
1074 and only one user context can update virtual service at a
1075 time, so the operation here is OK */
1076 atomic_dec(&dest->svc->refcnt);
1077 kfree(dest);
1078 } else {
4b5bdf5c
RN
1079 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1080 "dest->refcnt=%d\n",
e7ade46a 1081 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1082 atomic_read(&dest->refcnt));
1083 list_add(&dest->n_list, &ip_vs_dest_trash);
1084 atomic_inc(&dest->refcnt);
1085 }
1086}
1087
1088
1089/*
1090 * Unlink a destination from the given service
1091 */
1092static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1093 struct ip_vs_dest *dest,
1094 int svcupd)
1095{
1096 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1097
1098 /*
1099 * Remove it from the d-linked destination list.
1100 */
1101 list_del(&dest->n_list);
1102 svc->num_dests--;
82dfb6f3
SW
1103
1104 /*
1105 * Call the update_service function of its scheduler
1106 */
1107 if (svcupd && svc->scheduler->update_service)
1108 svc->scheduler->update_service(svc);
1da177e4
LT
1109}
1110
1111
1112/*
1113 * Delete a destination server in the given service
1114 */
1115static int
c860c6b1 1116ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1117{
1118 struct ip_vs_dest *dest;
014d730d 1119 __be16 dport = udest->port;
1da177e4
LT
1120
1121 EnterFunction(2);
1122
7937df15 1123 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1124
1da177e4
LT
1125 if (dest == NULL) {
1126 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1127 return -ENOENT;
1128 }
1129
1130 write_lock_bh(&__ip_vs_svc_lock);
1131
1132 /*
1133 * Wait until all other svc users go away.
1134 */
1135 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1136
1137 /*
1138 * Unlink dest from the service
1139 */
1140 __ip_vs_unlink_dest(svc, dest, 1);
1141
1142 write_unlock_bh(&__ip_vs_svc_lock);
1143
1144 /*
1145 * Delete the destination
1146 */
1147 __ip_vs_del_dest(dest);
1148
1149 LeaveFunction(2);
1150
1151 return 0;
1152}
1153
1154
1155/*
1156 * Add a service into the service hash table
1157 */
1158static int
c860c6b1
JV
1159ip_vs_add_service(struct ip_vs_service_user_kern *u,
1160 struct ip_vs_service **svc_p)
1da177e4
LT
1161{
1162 int ret = 0;
1163 struct ip_vs_scheduler *sched = NULL;
1164 struct ip_vs_service *svc = NULL;
1165
1166 /* increase the module use count */
1167 ip_vs_use_count_inc();
1168
1169 /* Lookup the scheduler by 'u->sched_name' */
1170 sched = ip_vs_scheduler_get(u->sched_name);
1171 if (sched == NULL) {
1172 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1173 u->sched_name);
1174 ret = -ENOENT;
1175 goto out_mod_dec;
1176 }
1177
0da974f4 1178 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1179 if (svc == NULL) {
1180 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1181 ret = -ENOMEM;
1182 goto out_err;
1183 }
1da177e4
LT
1184
1185 /* I'm the first user of the service */
1186 atomic_set(&svc->usecnt, 1);
1187 atomic_set(&svc->refcnt, 0);
1188
c860c6b1 1189 svc->af = u->af;
1da177e4 1190 svc->protocol = u->protocol;
c860c6b1 1191 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1192 svc->port = u->port;
1193 svc->fwmark = u->fwmark;
1194 svc->flags = u->flags;
1195 svc->timeout = u->timeout * HZ;
1196 svc->netmask = u->netmask;
1197
1198 INIT_LIST_HEAD(&svc->destinations);
1199 rwlock_init(&svc->sched_lock);
1200 spin_lock_init(&svc->stats.lock);
1201
1202 /* Bind the scheduler */
1203 ret = ip_vs_bind_scheduler(svc, sched);
1204 if (ret)
1205 goto out_err;
1206 sched = NULL;
1207
1208 /* Update the virtual service counters */
1209 if (svc->port == FTPPORT)
1210 atomic_inc(&ip_vs_ftpsvc_counter);
1211 else if (svc->port == 0)
1212 atomic_inc(&ip_vs_nullsvc_counter);
1213
1214 ip_vs_new_estimator(&svc->stats);
1215 ip_vs_num_services++;
1216
1217 /* Hash the service into the service table */
1218 write_lock_bh(&__ip_vs_svc_lock);
1219 ip_vs_svc_hash(svc);
1220 write_unlock_bh(&__ip_vs_svc_lock);
1221
1222 *svc_p = svc;
1223 return 0;
1224
1225 out_err:
1226 if (svc != NULL) {
1227 if (svc->scheduler)
1228 ip_vs_unbind_scheduler(svc);
1229 if (svc->inc) {
1230 local_bh_disable();
1231 ip_vs_app_inc_put(svc->inc);
1232 local_bh_enable();
1233 }
1234 kfree(svc);
1235 }
1236 ip_vs_scheduler_put(sched);
1237
1238 out_mod_dec:
1239 /* decrease the module use count */
1240 ip_vs_use_count_dec();
1241
1242 return ret;
1243}
1244
1245
1246/*
1247 * Edit a service and bind it with a new scheduler
1248 */
1249static int
c860c6b1 1250ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1251{
1252 struct ip_vs_scheduler *sched, *old_sched;
1253 int ret = 0;
1254
1255 /*
1256 * Lookup the scheduler, by 'u->sched_name'
1257 */
1258 sched = ip_vs_scheduler_get(u->sched_name);
1259 if (sched == NULL) {
1260 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1261 u->sched_name);
1262 return -ENOENT;
1263 }
1264 old_sched = sched;
1265
1266 write_lock_bh(&__ip_vs_svc_lock);
1267
1268 /*
1269 * Wait until all other svc users go away.
1270 */
1271 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1272
1273 /*
1274 * Set the flags and timeout value
1275 */
1276 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1277 svc->timeout = u->timeout * HZ;
1278 svc->netmask = u->netmask;
1279
1280 old_sched = svc->scheduler;
1281 if (sched != old_sched) {
1282 /*
1283 * Unbind the old scheduler
1284 */
1285 if ((ret = ip_vs_unbind_scheduler(svc))) {
1286 old_sched = sched;
1287 goto out;
1288 }
1289
1290 /*
1291 * Bind the new scheduler
1292 */
1293 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1294 /*
1295 * If ip_vs_bind_scheduler fails, restore the old
1296 * scheduler.
1297 * The main reason of failure is out of memory.
1298 *
1299 * The question is if the old scheduler can be
1300 * restored all the time. TODO: if it cannot be
1301 * restored some time, we must delete the service,
1302 * otherwise the system may crash.
1303 */
1304 ip_vs_bind_scheduler(svc, old_sched);
1305 old_sched = sched;
1306 goto out;
1307 }
1308 }
1309
1310 out:
1311 write_unlock_bh(&__ip_vs_svc_lock);
1312
1313 if (old_sched)
1314 ip_vs_scheduler_put(old_sched);
1315
1316 return ret;
1317}
1318
1319
1320/*
1321 * Delete a service from the service list
1322 * - The service must be unlinked, unlocked and not referenced!
1323 * - We are called under _bh lock
1324 */
1325static void __ip_vs_del_service(struct ip_vs_service *svc)
1326{
1327 struct ip_vs_dest *dest, *nxt;
1328 struct ip_vs_scheduler *old_sched;
1329
1330 ip_vs_num_services--;
1331 ip_vs_kill_estimator(&svc->stats);
1332
1333 /* Unbind scheduler */
1334 old_sched = svc->scheduler;
1335 ip_vs_unbind_scheduler(svc);
1336 if (old_sched)
1337 ip_vs_scheduler_put(old_sched);
1338
1339 /* Unbind app inc */
1340 if (svc->inc) {
1341 ip_vs_app_inc_put(svc->inc);
1342 svc->inc = NULL;
1343 }
1344
1345 /*
1346 * Unlink the whole destination list
1347 */
1348 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1349 __ip_vs_unlink_dest(svc, dest, 0);
1350 __ip_vs_del_dest(dest);
1351 }
1352
1353 /*
1354 * Update the virtual service counters
1355 */
1356 if (svc->port == FTPPORT)
1357 atomic_dec(&ip_vs_ftpsvc_counter);
1358 else if (svc->port == 0)
1359 atomic_dec(&ip_vs_nullsvc_counter);
1360
1361 /*
1362 * Free the service if nobody refers to it
1363 */
1364 if (atomic_read(&svc->refcnt) == 0)
1365 kfree(svc);
1366
1367 /* decrease the module use count */
1368 ip_vs_use_count_dec();
1369}
1370
1371/*
1372 * Delete a service from the service list
1373 */
1374static int ip_vs_del_service(struct ip_vs_service *svc)
1375{
1376 if (svc == NULL)
1377 return -EEXIST;
1378
1379 /*
1380 * Unhash it from the service table
1381 */
1382 write_lock_bh(&__ip_vs_svc_lock);
1383
1384 ip_vs_svc_unhash(svc);
1385
1386 /*
1387 * Wait until all the svc users go away.
1388 */
1389 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1390
1391 __ip_vs_del_service(svc);
1392
1393 write_unlock_bh(&__ip_vs_svc_lock);
1394
1395 return 0;
1396}
1397
1398
1399/*
1400 * Flush all the virtual services
1401 */
1402static int ip_vs_flush(void)
1403{
1404 int idx;
1405 struct ip_vs_service *svc, *nxt;
1406
1407 /*
1408 * Flush the service table hashed by <protocol,addr,port>
1409 */
1410 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1411 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1412 write_lock_bh(&__ip_vs_svc_lock);
1413 ip_vs_svc_unhash(svc);
1414 /*
1415 * Wait until all the svc users go away.
1416 */
1417 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1418 __ip_vs_del_service(svc);
1419 write_unlock_bh(&__ip_vs_svc_lock);
1420 }
1421 }
1422
1423 /*
1424 * Flush the service table hashed by fwmark
1425 */
1426 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1427 list_for_each_entry_safe(svc, nxt,
1428 &ip_vs_svc_fwm_table[idx], f_list) {
1429 write_lock_bh(&__ip_vs_svc_lock);
1430 ip_vs_svc_unhash(svc);
1431 /*
1432 * Wait until all the svc users go away.
1433 */
1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435 __ip_vs_del_service(svc);
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 }
1438 }
1439
1440 return 0;
1441}
1442
1443
1444/*
1445 * Zero counters in a service or all services
1446 */
1447static int ip_vs_zero_service(struct ip_vs_service *svc)
1448{
1449 struct ip_vs_dest *dest;
1450
1451 write_lock_bh(&__ip_vs_svc_lock);
1452 list_for_each_entry(dest, &svc->destinations, n_list) {
1453 ip_vs_zero_stats(&dest->stats);
1454 }
1455 ip_vs_zero_stats(&svc->stats);
1456 write_unlock_bh(&__ip_vs_svc_lock);
1457 return 0;
1458}
1459
1460static int ip_vs_zero_all(void)
1461{
1462 int idx;
1463 struct ip_vs_service *svc;
1464
1465 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1466 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1467 ip_vs_zero_service(svc);
1468 }
1469 }
1470
1471 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1472 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1473 ip_vs_zero_service(svc);
1474 }
1475 }
1476
1477 ip_vs_zero_stats(&ip_vs_stats);
1478 return 0;
1479}
1480
1481
1482static int
1483proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1484 void __user *buffer, size_t *lenp, loff_t *ppos)
1485{
1486 int *valp = table->data;
1487 int val = *valp;
1488 int rc;
1489
1490 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1491 if (write && (*valp != val)) {
1492 if ((*valp < 0) || (*valp > 3)) {
1493 /* Restore the correct value */
1494 *valp = val;
1495 } else {
1da177e4 1496 update_defense_level();
1da177e4
LT
1497 }
1498 }
1499 return rc;
1500}
1501
1502
1503static int
1504proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1505 void __user *buffer, size_t *lenp, loff_t *ppos)
1506{
1507 int *valp = table->data;
1508 int val[2];
1509 int rc;
1510
1511 /* backup the value first */
1512 memcpy(val, valp, sizeof(val));
1513
1514 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1515 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1516 /* Restore the correct value */
1517 memcpy(valp, val, sizeof(val));
1518 }
1519 return rc;
1520}
1521
1522
1523/*
1524 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1525 */
1526
1527static struct ctl_table vs_vars[] = {
1528 {
1da177e4
LT
1529 .procname = "amemthresh",
1530 .data = &sysctl_ip_vs_amemthresh,
1531 .maxlen = sizeof(int),
1532 .mode = 0644,
1533 .proc_handler = &proc_dointvec,
1534 },
1535#ifdef CONFIG_IP_VS_DEBUG
1536 {
1da177e4
LT
1537 .procname = "debug_level",
1538 .data = &sysctl_ip_vs_debug_level,
1539 .maxlen = sizeof(int),
1540 .mode = 0644,
1541 .proc_handler = &proc_dointvec,
1542 },
1543#endif
1544 {
1da177e4
LT
1545 .procname = "am_droprate",
1546 .data = &sysctl_ip_vs_am_droprate,
1547 .maxlen = sizeof(int),
1548 .mode = 0644,
1549 .proc_handler = &proc_dointvec,
1550 },
1551 {
1da177e4
LT
1552 .procname = "drop_entry",
1553 .data = &sysctl_ip_vs_drop_entry,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = &proc_do_defense_mode,
1557 },
1558 {
1da177e4
LT
1559 .procname = "drop_packet",
1560 .data = &sysctl_ip_vs_drop_packet,
1561 .maxlen = sizeof(int),
1562 .mode = 0644,
1563 .proc_handler = &proc_do_defense_mode,
1564 },
1565 {
1da177e4
LT
1566 .procname = "secure_tcp",
1567 .data = &sysctl_ip_vs_secure_tcp,
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
1570 .proc_handler = &proc_do_defense_mode,
1571 },
1572#if 0
1573 {
1da177e4
LT
1574 .procname = "timeout_established",
1575 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = &proc_dointvec_jiffies,
1579 },
1580 {
1da177e4
LT
1581 .procname = "timeout_synsent",
1582 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec_jiffies,
1586 },
1587 {
1da177e4
LT
1588 .procname = "timeout_synrecv",
1589 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec_jiffies,
1593 },
1594 {
1da177e4
LT
1595 .procname = "timeout_finwait",
1596 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
1599 .proc_handler = &proc_dointvec_jiffies,
1600 },
1601 {
1da177e4
LT
1602 .procname = "timeout_timewait",
1603 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = &proc_dointvec_jiffies,
1607 },
1608 {
1da177e4
LT
1609 .procname = "timeout_close",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615 {
1da177e4
LT
1616 .procname = "timeout_closewait",
1617 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = &proc_dointvec_jiffies,
1621 },
1622 {
1da177e4
LT
1623 .procname = "timeout_lastack",
1624 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1625 .maxlen = sizeof(int),
1626 .mode = 0644,
1627 .proc_handler = &proc_dointvec_jiffies,
1628 },
1629 {
1da177e4
LT
1630 .procname = "timeout_listen",
1631 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = &proc_dointvec_jiffies,
1635 },
1636 {
1da177e4
LT
1637 .procname = "timeout_synack",
1638 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
1641 .proc_handler = &proc_dointvec_jiffies,
1642 },
1643 {
1da177e4
LT
1644 .procname = "timeout_udp",
1645 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1646 .maxlen = sizeof(int),
1647 .mode = 0644,
1648 .proc_handler = &proc_dointvec_jiffies,
1649 },
1650 {
1da177e4
LT
1651 .procname = "timeout_icmp",
1652 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1653 .maxlen = sizeof(int),
1654 .mode = 0644,
1655 .proc_handler = &proc_dointvec_jiffies,
1656 },
1657#endif
1658 {
1da177e4
LT
1659 .procname = "cache_bypass",
1660 .data = &sysctl_ip_vs_cache_bypass,
1661 .maxlen = sizeof(int),
1662 .mode = 0644,
1663 .proc_handler = &proc_dointvec,
1664 },
1665 {
1da177e4
LT
1666 .procname = "expire_nodest_conn",
1667 .data = &sysctl_ip_vs_expire_nodest_conn,
1668 .maxlen = sizeof(int),
1669 .mode = 0644,
1670 .proc_handler = &proc_dointvec,
1671 },
1672 {
1da177e4
LT
1673 .procname = "expire_quiescent_template",
1674 .data = &sysctl_ip_vs_expire_quiescent_template,
1675 .maxlen = sizeof(int),
1676 .mode = 0644,
1677 .proc_handler = &proc_dointvec,
1678 },
1679 {
1da177e4
LT
1680 .procname = "sync_threshold",
1681 .data = &sysctl_ip_vs_sync_threshold,
1682 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1683 .mode = 0644,
1684 .proc_handler = &proc_do_sync_threshold,
1685 },
1686 {
1da177e4
LT
1687 .procname = "nat_icmp_send",
1688 .data = &sysctl_ip_vs_nat_icmp_send,
1689 .maxlen = sizeof(int),
1690 .mode = 0644,
1691 .proc_handler = &proc_dointvec,
1692 },
1693 { .ctl_name = 0 }
1694};
1695
5587da55 1696const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1697 { .procname = "net", .ctl_name = CTL_NET, },
1698 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1699 { .procname = "vs", },
1700 { }
1da177e4 1701};
90754f8e 1702EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1703
1704static struct ctl_table_header * sysctl_header;
1705
1706#ifdef CONFIG_PROC_FS
1707
1708struct ip_vs_iter {
1709 struct list_head *table;
1710 int bucket;
1711};
1712
1713/*
1714 * Write the contents of the VS rule table to a PROCfs file.
1715 * (It is kept just for backward compatibility)
1716 */
1717static inline const char *ip_vs_fwd_name(unsigned flags)
1718{
1719 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1720 case IP_VS_CONN_F_LOCALNODE:
1721 return "Local";
1722 case IP_VS_CONN_F_TUNNEL:
1723 return "Tunnel";
1724 case IP_VS_CONN_F_DROUTE:
1725 return "Route";
1726 default:
1727 return "Masq";
1728 }
1729}
1730
1731
1732/* Get the Nth entry in the two lists */
1733static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1734{
1735 struct ip_vs_iter *iter = seq->private;
1736 int idx;
1737 struct ip_vs_service *svc;
1738
1739 /* look in hash by protocol */
1740 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1741 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1742 if (pos-- == 0){
1743 iter->table = ip_vs_svc_table;
1744 iter->bucket = idx;
1745 return svc;
1746 }
1747 }
1748 }
1749
1750 /* keep looking in fwmark */
1751 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1752 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1753 if (pos-- == 0) {
1754 iter->table = ip_vs_svc_fwm_table;
1755 iter->bucket = idx;
1756 return svc;
1757 }
1758 }
1759 }
1760
1761 return NULL;
1762}
1763
1764static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1765{
1766
1767 read_lock_bh(&__ip_vs_svc_lock);
1768 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1769}
1770
1771
1772static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1773{
1774 struct list_head *e;
1775 struct ip_vs_iter *iter;
1776 struct ip_vs_service *svc;
1777
1778 ++*pos;
1779 if (v == SEQ_START_TOKEN)
1780 return ip_vs_info_array(seq,0);
1781
1782 svc = v;
1783 iter = seq->private;
1784
1785 if (iter->table == ip_vs_svc_table) {
1786 /* next service in table hashed by protocol */
1787 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1788 return list_entry(e, struct ip_vs_service, s_list);
1789
1790
1791 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1792 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1793 s_list) {
1794 return svc;
1795 }
1796 }
1797
1798 iter->table = ip_vs_svc_fwm_table;
1799 iter->bucket = -1;
1800 goto scan_fwmark;
1801 }
1802
1803 /* next service in hashed by fwmark */
1804 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1805 return list_entry(e, struct ip_vs_service, f_list);
1806
1807 scan_fwmark:
1808 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1809 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1810 f_list)
1811 return svc;
1812 }
1813
1814 return NULL;
1815}
1816
1817static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1818{
1819 read_unlock_bh(&__ip_vs_svc_lock);
1820}
1821
1822
1823static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1824{
1825 if (v == SEQ_START_TOKEN) {
1826 seq_printf(seq,
1827 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1828 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1829 seq_puts(seq,
1830 "Prot LocalAddress:Port Scheduler Flags\n");
1831 seq_puts(seq,
1832 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1833 } else {
1834 const struct ip_vs_service *svc = v;
1835 const struct ip_vs_iter *iter = seq->private;
1836 const struct ip_vs_dest *dest;
1837
667a5f18
VB
1838 if (iter->table == ip_vs_svc_table) {
1839#ifdef CONFIG_IP_VS_IPV6
1840 if (svc->af == AF_INET6)
1841 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1842 ip_vs_proto_name(svc->protocol),
1843 NIP6(svc->addr.in6),
1844 ntohs(svc->port),
1845 svc->scheduler->name);
1846 else
1847#endif
1848 seq_printf(seq, "%s %08X:%04X %s ",
1849 ip_vs_proto_name(svc->protocol),
1850 ntohl(svc->addr.ip),
1851 ntohs(svc->port),
1852 svc->scheduler->name);
1853 } else {
1da177e4
LT
1854 seq_printf(seq, "FWM %08X %s ",
1855 svc->fwmark, svc->scheduler->name);
667a5f18 1856 }
1da177e4
LT
1857
1858 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1859 seq_printf(seq, "persistent %d %08X\n",
1860 svc->timeout,
1861 ntohl(svc->netmask));
1862 else
1863 seq_putc(seq, '\n');
1864
1865 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1866#ifdef CONFIG_IP_VS_IPV6
1867 if (dest->af == AF_INET6)
1868 seq_printf(seq,
1869 " -> [" NIP6_FMT "]:%04X"
1870 " %-7s %-6d %-10d %-10d\n",
1871 NIP6(dest->addr.in6),
1872 ntohs(dest->port),
1873 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1874 atomic_read(&dest->weight),
1875 atomic_read(&dest->activeconns),
1876 atomic_read(&dest->inactconns));
1877 else
1878#endif
1879 seq_printf(seq,
1880 " -> %08X:%04X "
1881 "%-7s %-6d %-10d %-10d\n",
1882 ntohl(dest->addr.ip),
1883 ntohs(dest->port),
1884 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1885 atomic_read(&dest->weight),
1886 atomic_read(&dest->activeconns),
1887 atomic_read(&dest->inactconns));
1888
1da177e4
LT
1889 }
1890 }
1891 return 0;
1892}
1893
56b3d975 1894static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1895 .start = ip_vs_info_seq_start,
1896 .next = ip_vs_info_seq_next,
1897 .stop = ip_vs_info_seq_stop,
1898 .show = ip_vs_info_seq_show,
1899};
1900
1901static int ip_vs_info_open(struct inode *inode, struct file *file)
1902{
cf7732e4
PE
1903 return seq_open_private(file, &ip_vs_info_seq_ops,
1904 sizeof(struct ip_vs_iter));
1da177e4
LT
1905}
1906
9a32144e 1907static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1908 .owner = THIS_MODULE,
1909 .open = ip_vs_info_open,
1910 .read = seq_read,
1911 .llseek = seq_lseek,
1912 .release = seq_release_private,
1913};
1914
1915#endif
1916
519e49e8
SW
1917struct ip_vs_stats ip_vs_stats = {
1918 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1919};
1da177e4
LT
1920
1921#ifdef CONFIG_PROC_FS
1922static int ip_vs_stats_show(struct seq_file *seq, void *v)
1923{
1924
1925/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1926 seq_puts(seq,
1927 " Total Incoming Outgoing Incoming Outgoing\n");
1928 seq_printf(seq,
1929 " Conns Packets Packets Bytes Bytes\n");
1930
1931 spin_lock_bh(&ip_vs_stats.lock);
1932 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1933 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1934 (unsigned long long) ip_vs_stats.inbytes,
1935 (unsigned long long) ip_vs_stats.outbytes);
1936
1937/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1938 seq_puts(seq,
1939 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1940 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1941 ip_vs_stats.cps,
1942 ip_vs_stats.inpps,
1943 ip_vs_stats.outpps,
1944 ip_vs_stats.inbps,
1945 ip_vs_stats.outbps);
1946 spin_unlock_bh(&ip_vs_stats.lock);
1947
1948 return 0;
1949}
1950
1951static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1952{
1953 return single_open(file, ip_vs_stats_show, NULL);
1954}
1955
9a32144e 1956static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1957 .owner = THIS_MODULE,
1958 .open = ip_vs_stats_seq_open,
1959 .read = seq_read,
1960 .llseek = seq_lseek,
1961 .release = single_release,
1962};
1963
1964#endif
1965
1966/*
1967 * Set timeout values for tcp tcpfin udp in the timeout_table.
1968 */
1969static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1970{
1971 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1972 u->tcp_timeout,
1973 u->tcp_fin_timeout,
1974 u->udp_timeout);
1975
1976#ifdef CONFIG_IP_VS_PROTO_TCP
1977 if (u->tcp_timeout) {
1978 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1979 = u->tcp_timeout * HZ;
1980 }
1981
1982 if (u->tcp_fin_timeout) {
1983 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1984 = u->tcp_fin_timeout * HZ;
1985 }
1986#endif
1987
1988#ifdef CONFIG_IP_VS_PROTO_UDP
1989 if (u->udp_timeout) {
1990 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1991 = u->udp_timeout * HZ;
1992 }
1993#endif
1994 return 0;
1995}
1996
1997
1998#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1999#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2000#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2001 sizeof(struct ip_vs_dest_user))
2002#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2003#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2004#define MAX_ARG_LEN SVCDEST_ARG_LEN
2005
9b5b5cff 2006static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2007 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2008 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2009 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2010 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2011 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2012 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2013 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2014 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2015 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2016 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2017 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2018};
2019
c860c6b1
JV
2020static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2021 struct ip_vs_service_user *usvc_compat)
2022{
2023 usvc->af = AF_INET;
2024 usvc->protocol = usvc_compat->protocol;
2025 usvc->addr.ip = usvc_compat->addr;
2026 usvc->port = usvc_compat->port;
2027 usvc->fwmark = usvc_compat->fwmark;
2028
2029 /* Deep copy of sched_name is not needed here */
2030 usvc->sched_name = usvc_compat->sched_name;
2031
2032 usvc->flags = usvc_compat->flags;
2033 usvc->timeout = usvc_compat->timeout;
2034 usvc->netmask = usvc_compat->netmask;
2035}
2036
2037static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2038 struct ip_vs_dest_user *udest_compat)
2039{
2040 udest->addr.ip = udest_compat->addr;
2041 udest->port = udest_compat->port;
2042 udest->conn_flags = udest_compat->conn_flags;
2043 udest->weight = udest_compat->weight;
2044 udest->u_threshold = udest_compat->u_threshold;
2045 udest->l_threshold = udest_compat->l_threshold;
2046}
2047
1da177e4
LT
2048static int
2049do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2050{
2051 int ret;
2052 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2053 struct ip_vs_service_user *usvc_compat;
2054 struct ip_vs_service_user_kern usvc;
1da177e4 2055 struct ip_vs_service *svc;
c860c6b1
JV
2056 struct ip_vs_dest_user *udest_compat;
2057 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2058
2059 if (!capable(CAP_NET_ADMIN))
2060 return -EPERM;
2061
2062 if (len != set_arglen[SET_CMDID(cmd)]) {
2063 IP_VS_ERR("set_ctl: len %u != %u\n",
2064 len, set_arglen[SET_CMDID(cmd)]);
2065 return -EINVAL;
2066 }
2067
2068 if (copy_from_user(arg, user, len) != 0)
2069 return -EFAULT;
2070
2071 /* increase the module use count */
2072 ip_vs_use_count_inc();
2073
14cc3e2b 2074 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2075 ret = -ERESTARTSYS;
2076 goto out_dec;
2077 }
2078
2079 if (cmd == IP_VS_SO_SET_FLUSH) {
2080 /* Flush the virtual service */
2081 ret = ip_vs_flush();
2082 goto out_unlock;
2083 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2084 /* Set timeout values for (tcp tcpfin udp) */
2085 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2086 goto out_unlock;
2087 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2088 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2089 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2090 goto out_unlock;
2091 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2092 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2093 ret = stop_sync_thread(dm->state);
2094 goto out_unlock;
2095 }
2096
c860c6b1
JV
2097 usvc_compat = (struct ip_vs_service_user *)arg;
2098 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2099
2100 /* We only use the new structs internally, so copy userspace compat
2101 * structs to extended internal versions */
2102 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2103 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2104
2105 if (cmd == IP_VS_SO_SET_ZERO) {
2106 /* if no service address is set, zero counters in all */
c860c6b1 2107 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2108 ret = ip_vs_zero_all();
2109 goto out_unlock;
2110 }
2111 }
2112
2113 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2114 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2115 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2116 usvc.protocol, NIPQUAD(usvc.addr.ip),
2117 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2118 ret = -EFAULT;
2119 goto out_unlock;
2120 }
2121
2122 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2123 if (usvc.fwmark == 0)
b18610de
JV
2124 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2125 &usvc.addr, usvc.port);
1da177e4 2126 else
b18610de 2127 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2128
2129 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2130 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2131 ret = -ESRCH;
2132 goto out_unlock;
2133 }
2134
2135 switch (cmd) {
2136 case IP_VS_SO_SET_ADD:
2137 if (svc != NULL)
2138 ret = -EEXIST;
2139 else
c860c6b1 2140 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2141 break;
2142 case IP_VS_SO_SET_EDIT:
c860c6b1 2143 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2144 break;
2145 case IP_VS_SO_SET_DEL:
2146 ret = ip_vs_del_service(svc);
2147 if (!ret)
2148 goto out_unlock;
2149 break;
2150 case IP_VS_SO_SET_ZERO:
2151 ret = ip_vs_zero_service(svc);
2152 break;
2153 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2154 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2155 break;
2156 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2157 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2158 break;
2159 case IP_VS_SO_SET_DELDEST:
c860c6b1 2160 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2161 break;
2162 default:
2163 ret = -EINVAL;
2164 }
2165
2166 if (svc)
2167 ip_vs_service_put(svc);
2168
2169 out_unlock:
14cc3e2b 2170 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2171 out_dec:
2172 /* decrease the module use count */
2173 ip_vs_use_count_dec();
2174
2175 return ret;
2176}
2177
2178
2179static void
2180ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2181{
2182 spin_lock_bh(&src->lock);
2183 memcpy(dst, src, (char*)&src->lock - (char*)src);
2184 spin_unlock_bh(&src->lock);
2185}
2186
2187static void
2188ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2189{
2190 dst->protocol = src->protocol;
e7ade46a 2191 dst->addr = src->addr.ip;
1da177e4
LT
2192 dst->port = src->port;
2193 dst->fwmark = src->fwmark;
4da62fc7 2194 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2195 dst->flags = src->flags;
2196 dst->timeout = src->timeout / HZ;
2197 dst->netmask = src->netmask;
2198 dst->num_dests = src->num_dests;
2199 ip_vs_copy_stats(&dst->stats, &src->stats);
2200}
2201
2202static inline int
2203__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2204 struct ip_vs_get_services __user *uptr)
2205{
2206 int idx, count=0;
2207 struct ip_vs_service *svc;
2208 struct ip_vs_service_entry entry;
2209 int ret = 0;
2210
2211 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2212 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2213 if (count >= get->num_services)
2214 goto out;
4da62fc7 2215 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2216 ip_vs_copy_service(&entry, svc);
2217 if (copy_to_user(&uptr->entrytable[count],
2218 &entry, sizeof(entry))) {
2219 ret = -EFAULT;
2220 goto out;
2221 }
2222 count++;
2223 }
2224 }
2225
2226 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2227 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2228 if (count >= get->num_services)
2229 goto out;
4da62fc7 2230 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2231 ip_vs_copy_service(&entry, svc);
2232 if (copy_to_user(&uptr->entrytable[count],
2233 &entry, sizeof(entry))) {
2234 ret = -EFAULT;
2235 goto out;
2236 }
2237 count++;
2238 }
2239 }
2240 out:
2241 return ret;
2242}
2243
2244static inline int
2245__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2246 struct ip_vs_get_dests __user *uptr)
2247{
2248 struct ip_vs_service *svc;
b18610de 2249 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2250 int ret = 0;
2251
2252 if (get->fwmark)
b18610de 2253 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2254 else
b18610de
JV
2255 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2256 get->port);
2257
1da177e4
LT
2258 if (svc) {
2259 int count = 0;
2260 struct ip_vs_dest *dest;
2261 struct ip_vs_dest_entry entry;
2262
2263 list_for_each_entry(dest, &svc->destinations, n_list) {
2264 if (count >= get->num_dests)
2265 break;
2266
e7ade46a 2267 entry.addr = dest->addr.ip;
1da177e4
LT
2268 entry.port = dest->port;
2269 entry.conn_flags = atomic_read(&dest->conn_flags);
2270 entry.weight = atomic_read(&dest->weight);
2271 entry.u_threshold = dest->u_threshold;
2272 entry.l_threshold = dest->l_threshold;
2273 entry.activeconns = atomic_read(&dest->activeconns);
2274 entry.inactconns = atomic_read(&dest->inactconns);
2275 entry.persistconns = atomic_read(&dest->persistconns);
2276 ip_vs_copy_stats(&entry.stats, &dest->stats);
2277 if (copy_to_user(&uptr->entrytable[count],
2278 &entry, sizeof(entry))) {
2279 ret = -EFAULT;
2280 break;
2281 }
2282 count++;
2283 }
2284 ip_vs_service_put(svc);
2285 } else
2286 ret = -ESRCH;
2287 return ret;
2288}
2289
2290static inline void
2291__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2292{
2293#ifdef CONFIG_IP_VS_PROTO_TCP
2294 u->tcp_timeout =
2295 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2296 u->tcp_fin_timeout =
2297 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2298#endif
2299#ifdef CONFIG_IP_VS_PROTO_UDP
2300 u->udp_timeout =
2301 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2302#endif
2303}
2304
2305
2306#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2307#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2308#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2309#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2310#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2311#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2312#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2313
9b5b5cff 2314static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2315 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2316 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2317 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2318 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2319 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2320 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2321 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2322};
2323
2324static int
2325do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2326{
2327 unsigned char arg[128];
2328 int ret = 0;
2329
2330 if (!capable(CAP_NET_ADMIN))
2331 return -EPERM;
2332
2333 if (*len < get_arglen[GET_CMDID(cmd)]) {
2334 IP_VS_ERR("get_ctl: len %u < %u\n",
2335 *len, get_arglen[GET_CMDID(cmd)]);
2336 return -EINVAL;
2337 }
2338
2339 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2340 return -EFAULT;
2341
14cc3e2b 2342 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2343 return -ERESTARTSYS;
2344
2345 switch (cmd) {
2346 case IP_VS_SO_GET_VERSION:
2347 {
2348 char buf[64];
2349
2350 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2351 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2352 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2353 ret = -EFAULT;
2354 goto out;
2355 }
2356 *len = strlen(buf)+1;
2357 }
2358 break;
2359
2360 case IP_VS_SO_GET_INFO:
2361 {
2362 struct ip_vs_getinfo info;
2363 info.version = IP_VS_VERSION_CODE;
2364 info.size = IP_VS_CONN_TAB_SIZE;
2365 info.num_services = ip_vs_num_services;
2366 if (copy_to_user(user, &info, sizeof(info)) != 0)
2367 ret = -EFAULT;
2368 }
2369 break;
2370
2371 case IP_VS_SO_GET_SERVICES:
2372 {
2373 struct ip_vs_get_services *get;
2374 int size;
2375
2376 get = (struct ip_vs_get_services *)arg;
2377 size = sizeof(*get) +
2378 sizeof(struct ip_vs_service_entry) * get->num_services;
2379 if (*len != size) {
2380 IP_VS_ERR("length: %u != %u\n", *len, size);
2381 ret = -EINVAL;
2382 goto out;
2383 }
2384 ret = __ip_vs_get_service_entries(get, user);
2385 }
2386 break;
2387
2388 case IP_VS_SO_GET_SERVICE:
2389 {
2390 struct ip_vs_service_entry *entry;
2391 struct ip_vs_service *svc;
b18610de 2392 union nf_inet_addr addr;
1da177e4
LT
2393
2394 entry = (struct ip_vs_service_entry *)arg;
b18610de 2395 addr.ip = entry->addr;
1da177e4 2396 if (entry->fwmark)
b18610de 2397 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2398 else
b18610de
JV
2399 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2400 &addr, entry->port);
1da177e4
LT
2401 if (svc) {
2402 ip_vs_copy_service(entry, svc);
2403 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2404 ret = -EFAULT;
2405 ip_vs_service_put(svc);
2406 } else
2407 ret = -ESRCH;
2408 }
2409 break;
2410
2411 case IP_VS_SO_GET_DESTS:
2412 {
2413 struct ip_vs_get_dests *get;
2414 int size;
2415
2416 get = (struct ip_vs_get_dests *)arg;
2417 size = sizeof(*get) +
2418 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2419 if (*len != size) {
2420 IP_VS_ERR("length: %u != %u\n", *len, size);
2421 ret = -EINVAL;
2422 goto out;
2423 }
2424 ret = __ip_vs_get_dest_entries(get, user);
2425 }
2426 break;
2427
2428 case IP_VS_SO_GET_TIMEOUT:
2429 {
2430 struct ip_vs_timeout_user t;
2431
2432 __ip_vs_get_timeouts(&t);
2433 if (copy_to_user(user, &t, sizeof(t)) != 0)
2434 ret = -EFAULT;
2435 }
2436 break;
2437
2438 case IP_VS_SO_GET_DAEMON:
2439 {
2440 struct ip_vs_daemon_user d[2];
2441
2442 memset(&d, 0, sizeof(d));
2443 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2444 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2445 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2446 d[0].syncid = ip_vs_master_syncid;
2447 }
2448 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2449 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2450 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2451 d[1].syncid = ip_vs_backup_syncid;
2452 }
2453 if (copy_to_user(user, &d, sizeof(d)) != 0)
2454 ret = -EFAULT;
2455 }
2456 break;
2457
2458 default:
2459 ret = -EINVAL;
2460 }
2461
2462 out:
14cc3e2b 2463 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2464 return ret;
2465}
2466
2467
2468static struct nf_sockopt_ops ip_vs_sockopts = {
2469 .pf = PF_INET,
2470 .set_optmin = IP_VS_BASE_CTL,
2471 .set_optmax = IP_VS_SO_SET_MAX+1,
2472 .set = do_ip_vs_set_ctl,
2473 .get_optmin = IP_VS_BASE_CTL,
2474 .get_optmax = IP_VS_SO_GET_MAX+1,
2475 .get = do_ip_vs_get_ctl,
16fcec35 2476 .owner = THIS_MODULE,
1da177e4
LT
2477};
2478
9a812198
JV
2479/*
2480 * Generic Netlink interface
2481 */
2482
2483/* IPVS genetlink family */
2484static struct genl_family ip_vs_genl_family = {
2485 .id = GENL_ID_GENERATE,
2486 .hdrsize = 0,
2487 .name = IPVS_GENL_NAME,
2488 .version = IPVS_GENL_VERSION,
2489 .maxattr = IPVS_CMD_MAX,
2490};
2491
2492/* Policy used for first-level command attributes */
2493static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2494 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2495 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2496 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2497 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2498 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2499 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2500};
2501
2502/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2503static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2504 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2505 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2506 .len = IP_VS_IFNAME_MAXLEN },
2507 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2508};
2509
2510/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2511static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2512 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2513 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2514 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2515 .len = sizeof(union nf_inet_addr) },
2516 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2517 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2518 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2519 .len = IP_VS_SCHEDNAME_MAXLEN },
2520 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2521 .len = sizeof(struct ip_vs_flags) },
2522 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2523 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2524 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2525};
2526
2527/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2528static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2529 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2530 .len = sizeof(union nf_inet_addr) },
2531 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2532 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2533 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2534 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2535 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2536 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2537 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2538 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2539 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2540};
2541
2542static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2543 struct ip_vs_stats *stats)
2544{
2545 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2546 if (!nl_stats)
2547 return -EMSGSIZE;
2548
2549 spin_lock_bh(&stats->lock);
2550
2551 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2552 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2553 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2554 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2555 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2556 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2557 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2558 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2559 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2560 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2561
2562 spin_unlock_bh(&stats->lock);
2563
2564 nla_nest_end(skb, nl_stats);
2565
2566 return 0;
2567
2568nla_put_failure:
2569 spin_unlock_bh(&stats->lock);
2570 nla_nest_cancel(skb, nl_stats);
2571 return -EMSGSIZE;
2572}
2573
2574static int ip_vs_genl_fill_service(struct sk_buff *skb,
2575 struct ip_vs_service *svc)
2576{
2577 struct nlattr *nl_service;
2578 struct ip_vs_flags flags = { .flags = svc->flags,
2579 .mask = ~0 };
2580
2581 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2582 if (!nl_service)
2583 return -EMSGSIZE;
2584
2585 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2586
2587 if (svc->fwmark) {
2588 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2589 } else {
2590 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2591 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2592 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2593 }
2594
2595 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2596 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2597 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2598 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2599
2600 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2601 goto nla_put_failure;
2602
2603 nla_nest_end(skb, nl_service);
2604
2605 return 0;
2606
2607nla_put_failure:
2608 nla_nest_cancel(skb, nl_service);
2609 return -EMSGSIZE;
2610}
2611
2612static int ip_vs_genl_dump_service(struct sk_buff *skb,
2613 struct ip_vs_service *svc,
2614 struct netlink_callback *cb)
2615{
2616 void *hdr;
2617
2618 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2619 &ip_vs_genl_family, NLM_F_MULTI,
2620 IPVS_CMD_NEW_SERVICE);
2621 if (!hdr)
2622 return -EMSGSIZE;
2623
2624 if (ip_vs_genl_fill_service(skb, svc) < 0)
2625 goto nla_put_failure;
2626
2627 return genlmsg_end(skb, hdr);
2628
2629nla_put_failure:
2630 genlmsg_cancel(skb, hdr);
2631 return -EMSGSIZE;
2632}
2633
2634static int ip_vs_genl_dump_services(struct sk_buff *skb,
2635 struct netlink_callback *cb)
2636{
2637 int idx = 0, i;
2638 int start = cb->args[0];
2639 struct ip_vs_service *svc;
2640
2641 mutex_lock(&__ip_vs_mutex);
2642 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2643 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2644 if (++idx <= start)
2645 continue;
2646 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2647 idx--;
2648 goto nla_put_failure;
2649 }
2650 }
2651 }
2652
2653 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2654 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2655 if (++idx <= start)
2656 continue;
2657 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2658 idx--;
2659 goto nla_put_failure;
2660 }
2661 }
2662 }
2663
2664nla_put_failure:
2665 mutex_unlock(&__ip_vs_mutex);
2666 cb->args[0] = idx;
2667
2668 return skb->len;
2669}
2670
c860c6b1 2671static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2672 struct nlattr *nla, int full_entry)
2673{
2674 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2675 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2676
2677 /* Parse mandatory identifying service fields first */
2678 if (nla == NULL ||
2679 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2680 return -EINVAL;
2681
2682 nla_af = attrs[IPVS_SVC_ATTR_AF];
2683 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2684 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2685 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2686 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2687
2688 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2689 return -EINVAL;
2690
c860c6b1 2691 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2692 /* For now, only support IPv4 */
2693 if (nla_get_u16(nla_af) != AF_INET)
2694 return -EAFNOSUPPORT;
2695
2696 if (nla_fwmark) {
2697 usvc->protocol = IPPROTO_TCP;
2698 usvc->fwmark = nla_get_u32(nla_fwmark);
2699 } else {
2700 usvc->protocol = nla_get_u16(nla_protocol);
2701 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2702 usvc->port = nla_get_u16(nla_port);
2703 usvc->fwmark = 0;
2704 }
2705
2706 /* If a full entry was requested, check for the additional fields */
2707 if (full_entry) {
2708 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2709 *nla_netmask;
2710 struct ip_vs_flags flags;
2711 struct ip_vs_service *svc;
2712
2713 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2714 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2715 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2716 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2717
2718 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2719 return -EINVAL;
2720
2721 nla_memcpy(&flags, nla_flags, sizeof(flags));
2722
2723 /* prefill flags from service if it already exists */
2724 if (usvc->fwmark)
b18610de 2725 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2726 else
b18610de
JV
2727 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2728 &usvc->addr, usvc->port);
9a812198
JV
2729 if (svc) {
2730 usvc->flags = svc->flags;
2731 ip_vs_service_put(svc);
2732 } else
2733 usvc->flags = 0;
2734
2735 /* set new flags from userland */
2736 usvc->flags = (usvc->flags & ~flags.mask) |
2737 (flags.flags & flags.mask);
c860c6b1 2738 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2739 usvc->timeout = nla_get_u32(nla_timeout);
2740 usvc->netmask = nla_get_u32(nla_netmask);
2741 }
2742
2743 return 0;
2744}
2745
2746static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2747{
c860c6b1 2748 struct ip_vs_service_user_kern usvc;
9a812198
JV
2749 int ret;
2750
2751 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2752 if (ret)
2753 return ERR_PTR(ret);
2754
2755 if (usvc.fwmark)
b18610de 2756 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2757 else
b18610de
JV
2758 return __ip_vs_service_get(usvc.af, usvc.protocol,
2759 &usvc.addr, usvc.port);
9a812198
JV
2760}
2761
2762static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2763{
2764 struct nlattr *nl_dest;
2765
2766 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2767 if (!nl_dest)
2768 return -EMSGSIZE;
2769
2770 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2771 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2772
2773 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2774 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2775 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2776 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2777 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2778 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2779 atomic_read(&dest->activeconns));
2780 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2781 atomic_read(&dest->inactconns));
2782 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2783 atomic_read(&dest->persistconns));
2784
2785 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2786 goto nla_put_failure;
2787
2788 nla_nest_end(skb, nl_dest);
2789
2790 return 0;
2791
2792nla_put_failure:
2793 nla_nest_cancel(skb, nl_dest);
2794 return -EMSGSIZE;
2795}
2796
2797static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2798 struct netlink_callback *cb)
2799{
2800 void *hdr;
2801
2802 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2803 &ip_vs_genl_family, NLM_F_MULTI,
2804 IPVS_CMD_NEW_DEST);
2805 if (!hdr)
2806 return -EMSGSIZE;
2807
2808 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2809 goto nla_put_failure;
2810
2811 return genlmsg_end(skb, hdr);
2812
2813nla_put_failure:
2814 genlmsg_cancel(skb, hdr);
2815 return -EMSGSIZE;
2816}
2817
2818static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2819 struct netlink_callback *cb)
2820{
2821 int idx = 0;
2822 int start = cb->args[0];
2823 struct ip_vs_service *svc;
2824 struct ip_vs_dest *dest;
2825 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2826
2827 mutex_lock(&__ip_vs_mutex);
2828
2829 /* Try to find the service for which to dump destinations */
2830 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2831 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2832 goto out_err;
2833
2834 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2835 if (IS_ERR(svc) || svc == NULL)
2836 goto out_err;
2837
2838 /* Dump the destinations */
2839 list_for_each_entry(dest, &svc->destinations, n_list) {
2840 if (++idx <= start)
2841 continue;
2842 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2843 idx--;
2844 goto nla_put_failure;
2845 }
2846 }
2847
2848nla_put_failure:
2849 cb->args[0] = idx;
2850 ip_vs_service_put(svc);
2851
2852out_err:
2853 mutex_unlock(&__ip_vs_mutex);
2854
2855 return skb->len;
2856}
2857
c860c6b1 2858static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2859 struct nlattr *nla, int full_entry)
2860{
2861 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2862 struct nlattr *nla_addr, *nla_port;
2863
2864 /* Parse mandatory identifying destination fields first */
2865 if (nla == NULL ||
2866 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2867 return -EINVAL;
2868
2869 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2870 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2871
2872 if (!(nla_addr && nla_port))
2873 return -EINVAL;
2874
2875 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2876 udest->port = nla_get_u16(nla_port);
2877
2878 /* If a full entry was requested, check for the additional fields */
2879 if (full_entry) {
2880 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2881 *nla_l_thresh;
2882
2883 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2884 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2885 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2886 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2887
2888 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2889 return -EINVAL;
2890
2891 udest->conn_flags = nla_get_u32(nla_fwd)
2892 & IP_VS_CONN_F_FWD_MASK;
2893 udest->weight = nla_get_u32(nla_weight);
2894 udest->u_threshold = nla_get_u32(nla_u_thresh);
2895 udest->l_threshold = nla_get_u32(nla_l_thresh);
2896 }
2897
2898 return 0;
2899}
2900
2901static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2902 const char *mcast_ifn, __be32 syncid)
2903{
2904 struct nlattr *nl_daemon;
2905
2906 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2907 if (!nl_daemon)
2908 return -EMSGSIZE;
2909
2910 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2911 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2912 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2913
2914 nla_nest_end(skb, nl_daemon);
2915
2916 return 0;
2917
2918nla_put_failure:
2919 nla_nest_cancel(skb, nl_daemon);
2920 return -EMSGSIZE;
2921}
2922
2923static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2924 const char *mcast_ifn, __be32 syncid,
2925 struct netlink_callback *cb)
2926{
2927 void *hdr;
2928 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2929 &ip_vs_genl_family, NLM_F_MULTI,
2930 IPVS_CMD_NEW_DAEMON);
2931 if (!hdr)
2932 return -EMSGSIZE;
2933
2934 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2935 goto nla_put_failure;
2936
2937 return genlmsg_end(skb, hdr);
2938
2939nla_put_failure:
2940 genlmsg_cancel(skb, hdr);
2941 return -EMSGSIZE;
2942}
2943
2944static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2945 struct netlink_callback *cb)
2946{
2947 mutex_lock(&__ip_vs_mutex);
2948 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2949 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2950 ip_vs_master_mcast_ifn,
2951 ip_vs_master_syncid, cb) < 0)
2952 goto nla_put_failure;
2953
2954 cb->args[0] = 1;
2955 }
2956
2957 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2958 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2959 ip_vs_backup_mcast_ifn,
2960 ip_vs_backup_syncid, cb) < 0)
2961 goto nla_put_failure;
2962
2963 cb->args[1] = 1;
2964 }
2965
2966nla_put_failure:
2967 mutex_unlock(&__ip_vs_mutex);
2968
2969 return skb->len;
2970}
2971
2972static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2973{
2974 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2975 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2976 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2977 return -EINVAL;
2978
2979 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2980 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2981 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2982}
2983
2984static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2985{
2986 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2987 return -EINVAL;
2988
2989 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2990}
2991
2992static int ip_vs_genl_set_config(struct nlattr **attrs)
2993{
2994 struct ip_vs_timeout_user t;
2995
2996 __ip_vs_get_timeouts(&t);
2997
2998 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2999 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3000
3001 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3002 t.tcp_fin_timeout =
3003 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3004
3005 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3006 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3007
3008 return ip_vs_set_timeout(&t);
3009}
3010
3011static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3012{
3013 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3014 struct ip_vs_service_user_kern usvc;
3015 struct ip_vs_dest_user_kern udest;
9a812198
JV
3016 int ret = 0, cmd;
3017 int need_full_svc = 0, need_full_dest = 0;
3018
3019 cmd = info->genlhdr->cmd;
3020
3021 mutex_lock(&__ip_vs_mutex);
3022
3023 if (cmd == IPVS_CMD_FLUSH) {
3024 ret = ip_vs_flush();
3025 goto out;
3026 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3027 ret = ip_vs_genl_set_config(info->attrs);
3028 goto out;
3029 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3030 cmd == IPVS_CMD_DEL_DAEMON) {
3031
3032 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3033
3034 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3035 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3036 info->attrs[IPVS_CMD_ATTR_DAEMON],
3037 ip_vs_daemon_policy)) {
3038 ret = -EINVAL;
3039 goto out;
3040 }
3041
3042 if (cmd == IPVS_CMD_NEW_DAEMON)
3043 ret = ip_vs_genl_new_daemon(daemon_attrs);
3044 else
3045 ret = ip_vs_genl_del_daemon(daemon_attrs);
3046 goto out;
3047 } else if (cmd == IPVS_CMD_ZERO &&
3048 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3049 ret = ip_vs_zero_all();
3050 goto out;
3051 }
3052
3053 /* All following commands require a service argument, so check if we
3054 * received a valid one. We need a full service specification when
3055 * adding / editing a service. Only identifying members otherwise. */
3056 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3057 need_full_svc = 1;
3058
3059 ret = ip_vs_genl_parse_service(&usvc,
3060 info->attrs[IPVS_CMD_ATTR_SERVICE],
3061 need_full_svc);
3062 if (ret)
3063 goto out;
3064
3065 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3066 if (usvc.fwmark == 0)
b18610de
JV
3067 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3068 &usvc.addr, usvc.port);
9a812198 3069 else
b18610de 3070 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3071
3072 /* Unless we're adding a new service, the service must already exist */
3073 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3074 ret = -ESRCH;
3075 goto out;
3076 }
3077
3078 /* Destination commands require a valid destination argument. For
3079 * adding / editing a destination, we need a full destination
3080 * specification. */
3081 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3082 cmd == IPVS_CMD_DEL_DEST) {
3083 if (cmd != IPVS_CMD_DEL_DEST)
3084 need_full_dest = 1;
3085
3086 ret = ip_vs_genl_parse_dest(&udest,
3087 info->attrs[IPVS_CMD_ATTR_DEST],
3088 need_full_dest);
3089 if (ret)
3090 goto out;
3091 }
3092
3093 switch (cmd) {
3094 case IPVS_CMD_NEW_SERVICE:
3095 if (svc == NULL)
3096 ret = ip_vs_add_service(&usvc, &svc);
3097 else
3098 ret = -EEXIST;
3099 break;
3100 case IPVS_CMD_SET_SERVICE:
3101 ret = ip_vs_edit_service(svc, &usvc);
3102 break;
3103 case IPVS_CMD_DEL_SERVICE:
3104 ret = ip_vs_del_service(svc);
3105 break;
3106 case IPVS_CMD_NEW_DEST:
3107 ret = ip_vs_add_dest(svc, &udest);
3108 break;
3109 case IPVS_CMD_SET_DEST:
3110 ret = ip_vs_edit_dest(svc, &udest);
3111 break;
3112 case IPVS_CMD_DEL_DEST:
3113 ret = ip_vs_del_dest(svc, &udest);
3114 break;
3115 case IPVS_CMD_ZERO:
3116 ret = ip_vs_zero_service(svc);
3117 break;
3118 default:
3119 ret = -EINVAL;
3120 }
3121
3122out:
3123 if (svc)
3124 ip_vs_service_put(svc);
3125 mutex_unlock(&__ip_vs_mutex);
3126
3127 return ret;
3128}
3129
3130static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3131{
3132 struct sk_buff *msg;
3133 void *reply;
3134 int ret, cmd, reply_cmd;
3135
3136 cmd = info->genlhdr->cmd;
3137
3138 if (cmd == IPVS_CMD_GET_SERVICE)
3139 reply_cmd = IPVS_CMD_NEW_SERVICE;
3140 else if (cmd == IPVS_CMD_GET_INFO)
3141 reply_cmd = IPVS_CMD_SET_INFO;
3142 else if (cmd == IPVS_CMD_GET_CONFIG)
3143 reply_cmd = IPVS_CMD_SET_CONFIG;
3144 else {
3145 IP_VS_ERR("unknown Generic Netlink command\n");
3146 return -EINVAL;
3147 }
3148
3149 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3150 if (!msg)
3151 return -ENOMEM;
3152
3153 mutex_lock(&__ip_vs_mutex);
3154
3155 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3156 if (reply == NULL)
3157 goto nla_put_failure;
3158
3159 switch (cmd) {
3160 case IPVS_CMD_GET_SERVICE:
3161 {
3162 struct ip_vs_service *svc;
3163
3164 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3165 if (IS_ERR(svc)) {
3166 ret = PTR_ERR(svc);
3167 goto out_err;
3168 } else if (svc) {
3169 ret = ip_vs_genl_fill_service(msg, svc);
3170 ip_vs_service_put(svc);
3171 if (ret)
3172 goto nla_put_failure;
3173 } else {
3174 ret = -ESRCH;
3175 goto out_err;
3176 }
3177
3178 break;
3179 }
3180
3181 case IPVS_CMD_GET_CONFIG:
3182 {
3183 struct ip_vs_timeout_user t;
3184
3185 __ip_vs_get_timeouts(&t);
3186#ifdef CONFIG_IP_VS_PROTO_TCP
3187 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3188 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3189 t.tcp_fin_timeout);
3190#endif
3191#ifdef CONFIG_IP_VS_PROTO_UDP
3192 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3193#endif
3194
3195 break;
3196 }
3197
3198 case IPVS_CMD_GET_INFO:
3199 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3200 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3201 IP_VS_CONN_TAB_SIZE);
3202 break;
3203 }
3204
3205 genlmsg_end(msg, reply);
3206 ret = genlmsg_unicast(msg, info->snd_pid);
3207 goto out;
3208
3209nla_put_failure:
3210 IP_VS_ERR("not enough space in Netlink message\n");
3211 ret = -EMSGSIZE;
3212
3213out_err:
3214 nlmsg_free(msg);
3215out:
3216 mutex_unlock(&__ip_vs_mutex);
3217
3218 return ret;
3219}
3220
3221
3222static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3223 {
3224 .cmd = IPVS_CMD_NEW_SERVICE,
3225 .flags = GENL_ADMIN_PERM,
3226 .policy = ip_vs_cmd_policy,
3227 .doit = ip_vs_genl_set_cmd,
3228 },
3229 {
3230 .cmd = IPVS_CMD_SET_SERVICE,
3231 .flags = GENL_ADMIN_PERM,
3232 .policy = ip_vs_cmd_policy,
3233 .doit = ip_vs_genl_set_cmd,
3234 },
3235 {
3236 .cmd = IPVS_CMD_DEL_SERVICE,
3237 .flags = GENL_ADMIN_PERM,
3238 .policy = ip_vs_cmd_policy,
3239 .doit = ip_vs_genl_set_cmd,
3240 },
3241 {
3242 .cmd = IPVS_CMD_GET_SERVICE,
3243 .flags = GENL_ADMIN_PERM,
3244 .doit = ip_vs_genl_get_cmd,
3245 .dumpit = ip_vs_genl_dump_services,
3246 .policy = ip_vs_cmd_policy,
3247 },
3248 {
3249 .cmd = IPVS_CMD_NEW_DEST,
3250 .flags = GENL_ADMIN_PERM,
3251 .policy = ip_vs_cmd_policy,
3252 .doit = ip_vs_genl_set_cmd,
3253 },
3254 {
3255 .cmd = IPVS_CMD_SET_DEST,
3256 .flags = GENL_ADMIN_PERM,
3257 .policy = ip_vs_cmd_policy,
3258 .doit = ip_vs_genl_set_cmd,
3259 },
3260 {
3261 .cmd = IPVS_CMD_DEL_DEST,
3262 .flags = GENL_ADMIN_PERM,
3263 .policy = ip_vs_cmd_policy,
3264 .doit = ip_vs_genl_set_cmd,
3265 },
3266 {
3267 .cmd = IPVS_CMD_GET_DEST,
3268 .flags = GENL_ADMIN_PERM,
3269 .policy = ip_vs_cmd_policy,
3270 .dumpit = ip_vs_genl_dump_dests,
3271 },
3272 {
3273 .cmd = IPVS_CMD_NEW_DAEMON,
3274 .flags = GENL_ADMIN_PERM,
3275 .policy = ip_vs_cmd_policy,
3276 .doit = ip_vs_genl_set_cmd,
3277 },
3278 {
3279 .cmd = IPVS_CMD_DEL_DAEMON,
3280 .flags = GENL_ADMIN_PERM,
3281 .policy = ip_vs_cmd_policy,
3282 .doit = ip_vs_genl_set_cmd,
3283 },
3284 {
3285 .cmd = IPVS_CMD_GET_DAEMON,
3286 .flags = GENL_ADMIN_PERM,
3287 .dumpit = ip_vs_genl_dump_daemons,
3288 },
3289 {
3290 .cmd = IPVS_CMD_SET_CONFIG,
3291 .flags = GENL_ADMIN_PERM,
3292 .policy = ip_vs_cmd_policy,
3293 .doit = ip_vs_genl_set_cmd,
3294 },
3295 {
3296 .cmd = IPVS_CMD_GET_CONFIG,
3297 .flags = GENL_ADMIN_PERM,
3298 .doit = ip_vs_genl_get_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_GET_INFO,
3302 .flags = GENL_ADMIN_PERM,
3303 .doit = ip_vs_genl_get_cmd,
3304 },
3305 {
3306 .cmd = IPVS_CMD_ZERO,
3307 .flags = GENL_ADMIN_PERM,
3308 .policy = ip_vs_cmd_policy,
3309 .doit = ip_vs_genl_set_cmd,
3310 },
3311 {
3312 .cmd = IPVS_CMD_FLUSH,
3313 .flags = GENL_ADMIN_PERM,
3314 .doit = ip_vs_genl_set_cmd,
3315 },
3316};
3317
3318static int __init ip_vs_genl_register(void)
3319{
3320 int ret, i;
3321
3322 ret = genl_register_family(&ip_vs_genl_family);
3323 if (ret)
3324 return ret;
3325
3326 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3327 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3328 if (ret)
3329 goto err_out;
3330 }
3331 return 0;
3332
3333err_out:
3334 genl_unregister_family(&ip_vs_genl_family);
3335 return ret;
3336}
3337
3338static void ip_vs_genl_unregister(void)
3339{
3340 genl_unregister_family(&ip_vs_genl_family);
3341}
3342
3343/* End of Generic Netlink interface definitions */
3344
1da177e4 3345
048cf48b 3346int __init ip_vs_control_init(void)
1da177e4
LT
3347{
3348 int ret;
3349 int idx;
3350
3351 EnterFunction(2);
3352
3353 ret = nf_register_sockopt(&ip_vs_sockopts);
3354 if (ret) {
3355 IP_VS_ERR("cannot register sockopt.\n");
3356 return ret;
3357 }
3358
9a812198
JV
3359 ret = ip_vs_genl_register();
3360 if (ret) {
3361 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3362 nf_unregister_sockopt(&ip_vs_sockopts);
3363 return ret;
3364 }
3365
457c4cbc
EB
3366 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3367 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3368
90754f8e 3369 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3370
3371 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3372 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3373 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3374 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3375 }
3376 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3377 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3378 }
3379
1da177e4
LT
3380 ip_vs_new_estimator(&ip_vs_stats);
3381
3382 /* Hook the defense timer */
3383 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3384
3385 LeaveFunction(2);
3386 return 0;
3387}
3388
3389
3390void ip_vs_control_cleanup(void)
3391{
3392 EnterFunction(2);
3393 ip_vs_trash_cleanup();
3394 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3395 cancel_work_sync(&defense_work.work);
1da177e4
LT
3396 ip_vs_kill_estimator(&ip_vs_stats);
3397 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3398 proc_net_remove(&init_net, "ip_vs_stats");
3399 proc_net_remove(&init_net, "ip_vs");
9a812198 3400 ip_vs_genl_unregister();
1da177e4
LT
3401 nf_unregister_sockopt(&ip_vs_sockopts);
3402 LeaveFunction(2);
3403}