]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
ipvs: Restrict connection table size via Kconfig
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0
SH
746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
1da177e4 759 ip_vs_zero_estimator(stats);
e93615d0 760
3a14a313 761 spin_unlock_bh(&stats->lock);
1da177e4
LT
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
09571c7a
VB
778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
1da177e4
LT
790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
c860c6b1 830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
09571c7a
VB
838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
843 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
844 return -EINVAL;
845 } else
846#endif
847 {
848 atype = inet_addr_type(&init_net, udest->addr.ip);
849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
850 return -EINVAL;
851 }
1da177e4 852
0da974f4 853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
854 if (dest == NULL) {
855 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
856 return -ENOMEM;
857 }
1da177e4 858
c860c6b1 859 dest->af = svc->af;
1da177e4 860 dest->protocol = svc->protocol;
c860c6b1 861 dest->vaddr = svc->addr;
1da177e4
LT
862 dest->vport = svc->port;
863 dest->vfwmark = svc->fwmark;
c860c6b1 864 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
865 dest->port = udest->port;
866
867 atomic_set(&dest->activeconns, 0);
868 atomic_set(&dest->inactconns, 0);
869 atomic_set(&dest->persistconns, 0);
870 atomic_set(&dest->refcnt, 0);
871
872 INIT_LIST_HEAD(&dest->d_list);
873 spin_lock_init(&dest->dst_lock);
874 spin_lock_init(&dest->stats.lock);
875 __ip_vs_update_dest(svc, dest, udest);
876 ip_vs_new_estimator(&dest->stats);
877
878 *dest_p = dest;
879
880 LeaveFunction(2);
881 return 0;
882}
883
884
885/*
886 * Add a destination into an existing service
887 */
888static int
c860c6b1 889ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
890{
891 struct ip_vs_dest *dest;
c860c6b1 892 union nf_inet_addr daddr;
014d730d 893 __be16 dport = udest->port;
1da177e4
LT
894 int ret;
895
896 EnterFunction(2);
897
898 if (udest->weight < 0) {
899 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
900 return -ERANGE;
901 }
902
903 if (udest->l_threshold > udest->u_threshold) {
904 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
905 "upper threshold\n");
906 return -ERANGE;
907 }
908
c860c6b1
JV
909 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
910
1da177e4
LT
911 /*
912 * Check if the dest already exists in the list
913 */
7937df15
JV
914 dest = ip_vs_lookup_dest(svc, &daddr, dport);
915
1da177e4
LT
916 if (dest != NULL) {
917 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
918 return -EEXIST;
919 }
920
921 /*
922 * Check if the dest already exists in the trash and
923 * is from the same service
924 */
7937df15
JV
925 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
926
1da177e4 927 if (dest != NULL) {
cfc78c5a
JV
928 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
929 "dest->refcnt=%d, service %u/%s:%u\n",
930 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
931 atomic_read(&dest->refcnt),
932 dest->vfwmark,
933 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
934 ntohs(dest->vport));
935
1da177e4
LT
936 __ip_vs_update_dest(svc, dest, udest);
937
938 /*
939 * Get the destination from the trash
940 */
941 list_del(&dest->n_list);
942
943 ip_vs_new_estimator(&dest->stats);
944
945 write_lock_bh(&__ip_vs_svc_lock);
946
947 /*
948 * Wait until all other svc users go away.
949 */
950 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
951
952 list_add(&dest->n_list, &svc->destinations);
953 svc->num_dests++;
954
955 /* call the update_service function of its scheduler */
82dfb6f3
SW
956 if (svc->scheduler->update_service)
957 svc->scheduler->update_service(svc);
1da177e4
LT
958
959 write_unlock_bh(&__ip_vs_svc_lock);
960 return 0;
961 }
962
963 /*
964 * Allocate and initialize the dest structure
965 */
966 ret = ip_vs_new_dest(svc, udest, &dest);
967 if (ret) {
968 return ret;
969 }
970
971 /*
972 * Add the dest entry into the list
973 */
974 atomic_inc(&dest->refcnt);
975
976 write_lock_bh(&__ip_vs_svc_lock);
977
978 /*
979 * Wait until all other svc users go away.
980 */
981 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
982
983 list_add(&dest->n_list, &svc->destinations);
984 svc->num_dests++;
985
986 /* call the update_service function of its scheduler */
82dfb6f3
SW
987 if (svc->scheduler->update_service)
988 svc->scheduler->update_service(svc);
1da177e4
LT
989
990 write_unlock_bh(&__ip_vs_svc_lock);
991
992 LeaveFunction(2);
993
994 return 0;
995}
996
997
998/*
999 * Edit a destination in the given service
1000 */
1001static int
c860c6b1 1002ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1003{
1004 struct ip_vs_dest *dest;
c860c6b1 1005 union nf_inet_addr daddr;
014d730d 1006 __be16 dport = udest->port;
1da177e4
LT
1007
1008 EnterFunction(2);
1009
1010 if (udest->weight < 0) {
1011 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1012 return -ERANGE;
1013 }
1014
1015 if (udest->l_threshold > udest->u_threshold) {
1016 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1017 "upper threshold\n");
1018 return -ERANGE;
1019 }
1020
c860c6b1
JV
1021 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1022
1da177e4
LT
1023 /*
1024 * Lookup the destination list
1025 */
7937df15
JV
1026 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1027
1da177e4
LT
1028 if (dest == NULL) {
1029 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1030 return -ENOENT;
1031 }
1032
1033 __ip_vs_update_dest(svc, dest, udest);
1034
1035 write_lock_bh(&__ip_vs_svc_lock);
1036
1037 /* Wait until all other svc users go away */
cae7ca3d 1038 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1039
1040 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1041 if (svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
1da177e4
LT
1043
1044 write_unlock_bh(&__ip_vs_svc_lock);
1045
1046 LeaveFunction(2);
1047
1048 return 0;
1049}
1050
1051
1052/*
1053 * Delete a destination (must be already unlinked from the service)
1054 */
1055static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1056{
1057 ip_vs_kill_estimator(&dest->stats);
1058
1059 /*
1060 * Remove it from the d-linked list with the real services.
1061 */
1062 write_lock_bh(&__ip_vs_rs_lock);
1063 ip_vs_rs_unhash(dest);
1064 write_unlock_bh(&__ip_vs_rs_lock);
1065
1066 /*
1067 * Decrease the refcnt of the dest, and free the dest
1068 * if nobody refers to it (refcnt=0). Otherwise, throw
1069 * the destination into the trash.
1070 */
1071 if (atomic_dec_and_test(&dest->refcnt)) {
1072 ip_vs_dst_reset(dest);
1073 /* simply decrease svc->refcnt here, let the caller check
1074 and release the service if nobody refers to it.
1075 Only user context can release destination and service,
1076 and only one user context can update virtual service at a
1077 time, so the operation here is OK */
1078 atomic_dec(&dest->svc->refcnt);
1079 kfree(dest);
1080 } else {
cfc78c5a
JV
1081 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1082 "dest->refcnt=%d\n",
1083 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1084 ntohs(dest->port),
1085 atomic_read(&dest->refcnt));
1da177e4
LT
1086 list_add(&dest->n_list, &ip_vs_dest_trash);
1087 atomic_inc(&dest->refcnt);
1088 }
1089}
1090
1091
1092/*
1093 * Unlink a destination from the given service
1094 */
1095static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1096 struct ip_vs_dest *dest,
1097 int svcupd)
1098{
1099 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1100
1101 /*
1102 * Remove it from the d-linked destination list.
1103 */
1104 list_del(&dest->n_list);
1105 svc->num_dests--;
82dfb6f3
SW
1106
1107 /*
1108 * Call the update_service function of its scheduler
1109 */
1110 if (svcupd && svc->scheduler->update_service)
1111 svc->scheduler->update_service(svc);
1da177e4
LT
1112}
1113
1114
1115/*
1116 * Delete a destination server in the given service
1117 */
1118static int
c860c6b1 1119ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1120{
1121 struct ip_vs_dest *dest;
014d730d 1122 __be16 dport = udest->port;
1da177e4
LT
1123
1124 EnterFunction(2);
1125
7937df15 1126 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1127
1da177e4
LT
1128 if (dest == NULL) {
1129 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1130 return -ENOENT;
1131 }
1132
1133 write_lock_bh(&__ip_vs_svc_lock);
1134
1135 /*
1136 * Wait until all other svc users go away.
1137 */
1138 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1139
1140 /*
1141 * Unlink dest from the service
1142 */
1143 __ip_vs_unlink_dest(svc, dest, 1);
1144
1145 write_unlock_bh(&__ip_vs_svc_lock);
1146
1147 /*
1148 * Delete the destination
1149 */
1150 __ip_vs_del_dest(dest);
1151
1152 LeaveFunction(2);
1153
1154 return 0;
1155}
1156
1157
1158/*
1159 * Add a service into the service hash table
1160 */
1161static int
c860c6b1
JV
1162ip_vs_add_service(struct ip_vs_service_user_kern *u,
1163 struct ip_vs_service **svc_p)
1da177e4
LT
1164{
1165 int ret = 0;
1166 struct ip_vs_scheduler *sched = NULL;
1167 struct ip_vs_service *svc = NULL;
1168
1169 /* increase the module use count */
1170 ip_vs_use_count_inc();
1171
1172 /* Lookup the scheduler by 'u->sched_name' */
1173 sched = ip_vs_scheduler_get(u->sched_name);
1174 if (sched == NULL) {
1175 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1176 u->sched_name);
1177 ret = -ENOENT;
1178 goto out_mod_dec;
1179 }
1180
f94fd041
JV
1181#ifdef CONFIG_IP_VS_IPV6
1182 if (u->af == AF_INET6) {
1183 if (!sched->supports_ipv6) {
1184 ret = -EAFNOSUPPORT;
1185 goto out_err;
1186 }
1187 if ((u->netmask < 1) || (u->netmask > 128)) {
1188 ret = -EINVAL;
1189 goto out_err;
1190 }
1191 }
1192#endif
1193
0da974f4 1194 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1195 if (svc == NULL) {
1196 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1197 ret = -ENOMEM;
1198 goto out_err;
1199 }
1da177e4
LT
1200
1201 /* I'm the first user of the service */
1202 atomic_set(&svc->usecnt, 1);
1203 atomic_set(&svc->refcnt, 0);
1204
c860c6b1 1205 svc->af = u->af;
1da177e4 1206 svc->protocol = u->protocol;
c860c6b1 1207 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1208 svc->port = u->port;
1209 svc->fwmark = u->fwmark;
1210 svc->flags = u->flags;
1211 svc->timeout = u->timeout * HZ;
1212 svc->netmask = u->netmask;
1213
1214 INIT_LIST_HEAD(&svc->destinations);
1215 rwlock_init(&svc->sched_lock);
1216 spin_lock_init(&svc->stats.lock);
1217
1218 /* Bind the scheduler */
1219 ret = ip_vs_bind_scheduler(svc, sched);
1220 if (ret)
1221 goto out_err;
1222 sched = NULL;
1223
1224 /* Update the virtual service counters */
1225 if (svc->port == FTPPORT)
1226 atomic_inc(&ip_vs_ftpsvc_counter);
1227 else if (svc->port == 0)
1228 atomic_inc(&ip_vs_nullsvc_counter);
1229
1230 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1231
1232 /* Count only IPv4 services for old get/setsockopt interface */
1233 if (svc->af == AF_INET)
1234 ip_vs_num_services++;
1da177e4
LT
1235
1236 /* Hash the service into the service table */
1237 write_lock_bh(&__ip_vs_svc_lock);
1238 ip_vs_svc_hash(svc);
1239 write_unlock_bh(&__ip_vs_svc_lock);
1240
1241 *svc_p = svc;
1242 return 0;
1243
1244 out_err:
1245 if (svc != NULL) {
1246 if (svc->scheduler)
1247 ip_vs_unbind_scheduler(svc);
1248 if (svc->inc) {
1249 local_bh_disable();
1250 ip_vs_app_inc_put(svc->inc);
1251 local_bh_enable();
1252 }
1253 kfree(svc);
1254 }
1255 ip_vs_scheduler_put(sched);
1256
1257 out_mod_dec:
1258 /* decrease the module use count */
1259 ip_vs_use_count_dec();
1260
1261 return ret;
1262}
1263
1264
1265/*
1266 * Edit a service and bind it with a new scheduler
1267 */
1268static int
c860c6b1 1269ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1270{
1271 struct ip_vs_scheduler *sched, *old_sched;
1272 int ret = 0;
1273
1274 /*
1275 * Lookup the scheduler, by 'u->sched_name'
1276 */
1277 sched = ip_vs_scheduler_get(u->sched_name);
1278 if (sched == NULL) {
1279 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1280 u->sched_name);
1281 return -ENOENT;
1282 }
1283 old_sched = sched;
1284
f94fd041
JV
1285#ifdef CONFIG_IP_VS_IPV6
1286 if (u->af == AF_INET6) {
1287 if (!sched->supports_ipv6) {
a5ba4bf2 1288 ret = -EAFNOSUPPORT;
f94fd041
JV
1289 goto out;
1290 }
1291 if ((u->netmask < 1) || (u->netmask > 128)) {
a5ba4bf2 1292 ret = -EINVAL;
f94fd041
JV
1293 goto out;
1294 }
1295 }
1296#endif
1297
1da177e4
LT
1298 write_lock_bh(&__ip_vs_svc_lock);
1299
1300 /*
1301 * Wait until all other svc users go away.
1302 */
1303 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1304
1305 /*
1306 * Set the flags and timeout value
1307 */
1308 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1309 svc->timeout = u->timeout * HZ;
1310 svc->netmask = u->netmask;
1311
1312 old_sched = svc->scheduler;
1313 if (sched != old_sched) {
1314 /*
1315 * Unbind the old scheduler
1316 */
1317 if ((ret = ip_vs_unbind_scheduler(svc))) {
1318 old_sched = sched;
1319 goto out;
1320 }
1321
1322 /*
1323 * Bind the new scheduler
1324 */
1325 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1326 /*
1327 * If ip_vs_bind_scheduler fails, restore the old
1328 * scheduler.
1329 * The main reason of failure is out of memory.
1330 *
1331 * The question is if the old scheduler can be
1332 * restored all the time. TODO: if it cannot be
1333 * restored some time, we must delete the service,
1334 * otherwise the system may crash.
1335 */
1336 ip_vs_bind_scheduler(svc, old_sched);
1337 old_sched = sched;
1338 goto out;
1339 }
1340 }
1341
1342 out:
1343 write_unlock_bh(&__ip_vs_svc_lock);
1344
1345 if (old_sched)
1346 ip_vs_scheduler_put(old_sched);
1347
1348 return ret;
1349}
1350
1351
1352/*
1353 * Delete a service from the service list
1354 * - The service must be unlinked, unlocked and not referenced!
1355 * - We are called under _bh lock
1356 */
1357static void __ip_vs_del_service(struct ip_vs_service *svc)
1358{
1359 struct ip_vs_dest *dest, *nxt;
1360 struct ip_vs_scheduler *old_sched;
1361
f94fd041
JV
1362 /* Count only IPv4 services for old get/setsockopt interface */
1363 if (svc->af == AF_INET)
1364 ip_vs_num_services--;
1365
1da177e4
LT
1366 ip_vs_kill_estimator(&svc->stats);
1367
1368 /* Unbind scheduler */
1369 old_sched = svc->scheduler;
1370 ip_vs_unbind_scheduler(svc);
1371 if (old_sched)
1372 ip_vs_scheduler_put(old_sched);
1373
1374 /* Unbind app inc */
1375 if (svc->inc) {
1376 ip_vs_app_inc_put(svc->inc);
1377 svc->inc = NULL;
1378 }
1379
1380 /*
1381 * Unlink the whole destination list
1382 */
1383 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1384 __ip_vs_unlink_dest(svc, dest, 0);
1385 __ip_vs_del_dest(dest);
1386 }
1387
1388 /*
1389 * Update the virtual service counters
1390 */
1391 if (svc->port == FTPPORT)
1392 atomic_dec(&ip_vs_ftpsvc_counter);
1393 else if (svc->port == 0)
1394 atomic_dec(&ip_vs_nullsvc_counter);
1395
1396 /*
1397 * Free the service if nobody refers to it
1398 */
1399 if (atomic_read(&svc->refcnt) == 0)
1400 kfree(svc);
1401
1402 /* decrease the module use count */
1403 ip_vs_use_count_dec();
1404}
1405
1406/*
1407 * Delete a service from the service list
1408 */
1409static int ip_vs_del_service(struct ip_vs_service *svc)
1410{
1411 if (svc == NULL)
1412 return -EEXIST;
1413
1414 /*
1415 * Unhash it from the service table
1416 */
1417 write_lock_bh(&__ip_vs_svc_lock);
1418
1419 ip_vs_svc_unhash(svc);
1420
1421 /*
1422 * Wait until all the svc users go away.
1423 */
1424 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1425
1426 __ip_vs_del_service(svc);
1427
1428 write_unlock_bh(&__ip_vs_svc_lock);
1429
1430 return 0;
1431}
1432
1433
1434/*
1435 * Flush all the virtual services
1436 */
1437static int ip_vs_flush(void)
1438{
1439 int idx;
1440 struct ip_vs_service *svc, *nxt;
1441
1442 /*
1443 * Flush the service table hashed by <protocol,addr,port>
1444 */
1445 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1446 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1447 write_lock_bh(&__ip_vs_svc_lock);
1448 ip_vs_svc_unhash(svc);
1449 /*
1450 * Wait until all the svc users go away.
1451 */
1452 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1453 __ip_vs_del_service(svc);
1454 write_unlock_bh(&__ip_vs_svc_lock);
1455 }
1456 }
1457
1458 /*
1459 * Flush the service table hashed by fwmark
1460 */
1461 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1462 list_for_each_entry_safe(svc, nxt,
1463 &ip_vs_svc_fwm_table[idx], f_list) {
1464 write_lock_bh(&__ip_vs_svc_lock);
1465 ip_vs_svc_unhash(svc);
1466 /*
1467 * Wait until all the svc users go away.
1468 */
1469 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1470 __ip_vs_del_service(svc);
1471 write_unlock_bh(&__ip_vs_svc_lock);
1472 }
1473 }
1474
1475 return 0;
1476}
1477
1478
1479/*
1480 * Zero counters in a service or all services
1481 */
1482static int ip_vs_zero_service(struct ip_vs_service *svc)
1483{
1484 struct ip_vs_dest *dest;
1485
1486 write_lock_bh(&__ip_vs_svc_lock);
1487 list_for_each_entry(dest, &svc->destinations, n_list) {
1488 ip_vs_zero_stats(&dest->stats);
1489 }
1490 ip_vs_zero_stats(&svc->stats);
1491 write_unlock_bh(&__ip_vs_svc_lock);
1492 return 0;
1493}
1494
1495static int ip_vs_zero_all(void)
1496{
1497 int idx;
1498 struct ip_vs_service *svc;
1499
1500 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1501 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1502 ip_vs_zero_service(svc);
1503 }
1504 }
1505
1506 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1507 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1508 ip_vs_zero_service(svc);
1509 }
1510 }
1511
1512 ip_vs_zero_stats(&ip_vs_stats);
1513 return 0;
1514}
1515
1516
1517static int
1518proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1519 void __user *buffer, size_t *lenp, loff_t *ppos)
1520{
1521 int *valp = table->data;
1522 int val = *valp;
1523 int rc;
1524
1525 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1529 *valp = val;
1530 } else {
1da177e4 1531 update_defense_level();
1da177e4
LT
1532 }
1533 }
1534 return rc;
1535}
1536
1537
1538static int
1539proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1540 void __user *buffer, size_t *lenp, loff_t *ppos)
1541{
1542 int *valp = table->data;
1543 int val[2];
1544 int rc;
1545
1546 /* backup the value first */
1547 memcpy(val, valp, sizeof(val));
1548
1549 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1550 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1551 /* Restore the correct value */
1552 memcpy(valp, val, sizeof(val));
1553 }
1554 return rc;
1555}
1556
1557
1558/*
1559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1560 */
1561
1562static struct ctl_table vs_vars[] = {
1563 {
1da177e4
LT
1564 .procname = "amemthresh",
1565 .data = &sysctl_ip_vs_amemthresh,
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
1568 .proc_handler = &proc_dointvec,
1569 },
1570#ifdef CONFIG_IP_VS_DEBUG
1571 {
1da177e4
LT
1572 .procname = "debug_level",
1573 .data = &sysctl_ip_vs_debug_level,
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = &proc_dointvec,
1577 },
1578#endif
1579 {
1da177e4
LT
1580 .procname = "am_droprate",
1581 .data = &sysctl_ip_vs_am_droprate,
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
1584 .proc_handler = &proc_dointvec,
1585 },
1586 {
1da177e4
LT
1587 .procname = "drop_entry",
1588 .data = &sysctl_ip_vs_drop_entry,
1589 .maxlen = sizeof(int),
1590 .mode = 0644,
1591 .proc_handler = &proc_do_defense_mode,
1592 },
1593 {
1da177e4
LT
1594 .procname = "drop_packet",
1595 .data = &sysctl_ip_vs_drop_packet,
1596 .maxlen = sizeof(int),
1597 .mode = 0644,
1598 .proc_handler = &proc_do_defense_mode,
1599 },
1600 {
1da177e4
LT
1601 .procname = "secure_tcp",
1602 .data = &sysctl_ip_vs_secure_tcp,
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_do_defense_mode,
1606 },
1607#if 0
1608 {
1da177e4
LT
1609 .procname = "timeout_established",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615 {
1da177e4
LT
1616 .procname = "timeout_synsent",
1617 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = &proc_dointvec_jiffies,
1621 },
1622 {
1da177e4
LT
1623 .procname = "timeout_synrecv",
1624 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1625 .maxlen = sizeof(int),
1626 .mode = 0644,
1627 .proc_handler = &proc_dointvec_jiffies,
1628 },
1629 {
1da177e4
LT
1630 .procname = "timeout_finwait",
1631 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = &proc_dointvec_jiffies,
1635 },
1636 {
1da177e4
LT
1637 .procname = "timeout_timewait",
1638 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
1641 .proc_handler = &proc_dointvec_jiffies,
1642 },
1643 {
1da177e4
LT
1644 .procname = "timeout_close",
1645 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1646 .maxlen = sizeof(int),
1647 .mode = 0644,
1648 .proc_handler = &proc_dointvec_jiffies,
1649 },
1650 {
1da177e4
LT
1651 .procname = "timeout_closewait",
1652 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1653 .maxlen = sizeof(int),
1654 .mode = 0644,
1655 .proc_handler = &proc_dointvec_jiffies,
1656 },
1657 {
1da177e4
LT
1658 .procname = "timeout_lastack",
1659 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1660 .maxlen = sizeof(int),
1661 .mode = 0644,
1662 .proc_handler = &proc_dointvec_jiffies,
1663 },
1664 {
1da177e4
LT
1665 .procname = "timeout_listen",
1666 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1667 .maxlen = sizeof(int),
1668 .mode = 0644,
1669 .proc_handler = &proc_dointvec_jiffies,
1670 },
1671 {
1da177e4
LT
1672 .procname = "timeout_synack",
1673 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1674 .maxlen = sizeof(int),
1675 .mode = 0644,
1676 .proc_handler = &proc_dointvec_jiffies,
1677 },
1678 {
1da177e4
LT
1679 .procname = "timeout_udp",
1680 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1681 .maxlen = sizeof(int),
1682 .mode = 0644,
1683 .proc_handler = &proc_dointvec_jiffies,
1684 },
1685 {
1da177e4
LT
1686 .procname = "timeout_icmp",
1687 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1688 .maxlen = sizeof(int),
1689 .mode = 0644,
1690 .proc_handler = &proc_dointvec_jiffies,
1691 },
1692#endif
1693 {
1da177e4
LT
1694 .procname = "cache_bypass",
1695 .data = &sysctl_ip_vs_cache_bypass,
1696 .maxlen = sizeof(int),
1697 .mode = 0644,
1698 .proc_handler = &proc_dointvec,
1699 },
1700 {
1da177e4
LT
1701 .procname = "expire_nodest_conn",
1702 .data = &sysctl_ip_vs_expire_nodest_conn,
1703 .maxlen = sizeof(int),
1704 .mode = 0644,
1705 .proc_handler = &proc_dointvec,
1706 },
1707 {
1da177e4
LT
1708 .procname = "expire_quiescent_template",
1709 .data = &sysctl_ip_vs_expire_quiescent_template,
1710 .maxlen = sizeof(int),
1711 .mode = 0644,
1712 .proc_handler = &proc_dointvec,
1713 },
1714 {
1da177e4
LT
1715 .procname = "sync_threshold",
1716 .data = &sysctl_ip_vs_sync_threshold,
1717 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1718 .mode = 0644,
1719 .proc_handler = &proc_do_sync_threshold,
1720 },
1721 {
1da177e4
LT
1722 .procname = "nat_icmp_send",
1723 .data = &sysctl_ip_vs_nat_icmp_send,
1724 .maxlen = sizeof(int),
1725 .mode = 0644,
1726 .proc_handler = &proc_dointvec,
1727 },
1728 { .ctl_name = 0 }
1729};
1730
5587da55 1731const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1732 { .procname = "net", .ctl_name = CTL_NET, },
1733 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1734 { .procname = "vs", },
1735 { }
1da177e4 1736};
90754f8e 1737EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1738
1739static struct ctl_table_header * sysctl_header;
1740
1741#ifdef CONFIG_PROC_FS
1742
1743struct ip_vs_iter {
1744 struct list_head *table;
1745 int bucket;
1746};
1747
1748/*
1749 * Write the contents of the VS rule table to a PROCfs file.
1750 * (It is kept just for backward compatibility)
1751 */
1752static inline const char *ip_vs_fwd_name(unsigned flags)
1753{
1754 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1755 case IP_VS_CONN_F_LOCALNODE:
1756 return "Local";
1757 case IP_VS_CONN_F_TUNNEL:
1758 return "Tunnel";
1759 case IP_VS_CONN_F_DROUTE:
1760 return "Route";
1761 default:
1762 return "Masq";
1763 }
1764}
1765
1766
1767/* Get the Nth entry in the two lists */
1768static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1769{
1770 struct ip_vs_iter *iter = seq->private;
1771 int idx;
1772 struct ip_vs_service *svc;
1773
1774 /* look in hash by protocol */
1775 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1776 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1777 if (pos-- == 0){
1778 iter->table = ip_vs_svc_table;
1779 iter->bucket = idx;
1780 return svc;
1781 }
1782 }
1783 }
1784
1785 /* keep looking in fwmark */
1786 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1787 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1788 if (pos-- == 0) {
1789 iter->table = ip_vs_svc_fwm_table;
1790 iter->bucket = idx;
1791 return svc;
1792 }
1793 }
1794 }
1795
1796 return NULL;
1797}
1798
1799static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1800{
1801
1802 read_lock_bh(&__ip_vs_svc_lock);
1803 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1804}
1805
1806
1807static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808{
1809 struct list_head *e;
1810 struct ip_vs_iter *iter;
1811 struct ip_vs_service *svc;
1812
1813 ++*pos;
1814 if (v == SEQ_START_TOKEN)
1815 return ip_vs_info_array(seq,0);
1816
1817 svc = v;
1818 iter = seq->private;
1819
1820 if (iter->table == ip_vs_svc_table) {
1821 /* next service in table hashed by protocol */
1822 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1823 return list_entry(e, struct ip_vs_service, s_list);
1824
1825
1826 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1827 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1828 s_list) {
1829 return svc;
1830 }
1831 }
1832
1833 iter->table = ip_vs_svc_fwm_table;
1834 iter->bucket = -1;
1835 goto scan_fwmark;
1836 }
1837
1838 /* next service in hashed by fwmark */
1839 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1840 return list_entry(e, struct ip_vs_service, f_list);
1841
1842 scan_fwmark:
1843 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1844 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1845 f_list)
1846 return svc;
1847 }
1848
1849 return NULL;
1850}
1851
1852static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1853{
1854 read_unlock_bh(&__ip_vs_svc_lock);
1855}
1856
1857
1858static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1859{
1860 if (v == SEQ_START_TOKEN) {
1861 seq_printf(seq,
1862 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1863 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1864 seq_puts(seq,
1865 "Prot LocalAddress:Port Scheduler Flags\n");
1866 seq_puts(seq,
1867 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1868 } else {
1869 const struct ip_vs_service *svc = v;
1870 const struct ip_vs_iter *iter = seq->private;
1871 const struct ip_vs_dest *dest;
1872
667a5f18
VB
1873 if (iter->table == ip_vs_svc_table) {
1874#ifdef CONFIG_IP_VS_IPV6
1875 if (svc->af == AF_INET6)
1876 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1877 ip_vs_proto_name(svc->protocol),
1878 NIP6(svc->addr.in6),
1879 ntohs(svc->port),
1880 svc->scheduler->name);
1881 else
1882#endif
1883 seq_printf(seq, "%s %08X:%04X %s ",
1884 ip_vs_proto_name(svc->protocol),
1885 ntohl(svc->addr.ip),
1886 ntohs(svc->port),
1887 svc->scheduler->name);
1888 } else {
1da177e4
LT
1889 seq_printf(seq, "FWM %08X %s ",
1890 svc->fwmark, svc->scheduler->name);
667a5f18 1891 }
1da177e4
LT
1892
1893 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1894 seq_printf(seq, "persistent %d %08X\n",
1895 svc->timeout,
1896 ntohl(svc->netmask));
1897 else
1898 seq_putc(seq, '\n');
1899
1900 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1901#ifdef CONFIG_IP_VS_IPV6
1902 if (dest->af == AF_INET6)
1903 seq_printf(seq,
1904 " -> [" NIP6_FMT "]:%04X"
1905 " %-7s %-6d %-10d %-10d\n",
1906 NIP6(dest->addr.in6),
1907 ntohs(dest->port),
1908 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1909 atomic_read(&dest->weight),
1910 atomic_read(&dest->activeconns),
1911 atomic_read(&dest->inactconns));
1912 else
1913#endif
1914 seq_printf(seq,
1915 " -> %08X:%04X "
1916 "%-7s %-6d %-10d %-10d\n",
1917 ntohl(dest->addr.ip),
1918 ntohs(dest->port),
1919 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1920 atomic_read(&dest->weight),
1921 atomic_read(&dest->activeconns),
1922 atomic_read(&dest->inactconns));
1923
1da177e4
LT
1924 }
1925 }
1926 return 0;
1927}
1928
56b3d975 1929static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1930 .start = ip_vs_info_seq_start,
1931 .next = ip_vs_info_seq_next,
1932 .stop = ip_vs_info_seq_stop,
1933 .show = ip_vs_info_seq_show,
1934};
1935
1936static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{
cf7732e4
PE
1938 return seq_open_private(file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter));
1da177e4
LT
1940}
1941
9a32144e 1942static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1943 .owner = THIS_MODULE,
1944 .open = ip_vs_info_open,
1945 .read = seq_read,
1946 .llseek = seq_lseek,
1947 .release = seq_release_private,
1948};
1949
1950#endif
1951
519e49e8
SW
1952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954};
1da177e4
LT
1955
1956#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{
1959
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq,
1962 " Total Incoming Outgoing Incoming Outgoing\n");
1963 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n");
1965
1966 spin_lock_bh(&ip_vs_stats.lock);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1968 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1969 (unsigned long long) ip_vs_stats.inbytes,
1970 (unsigned long long) ip_vs_stats.outbytes);
1971
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.cps,
1977 ip_vs_stats.inpps,
1978 ip_vs_stats.outpps,
1979 ip_vs_stats.inbps,
1980 ip_vs_stats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1982
1983 return 0;
1984}
1985
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{
1988 return single_open(file, ip_vs_stats_show, NULL);
1989}
1990
9a32144e 1991static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1992 .owner = THIS_MODULE,
1993 .open = ip_vs_stats_seq_open,
1994 .read = seq_read,
1995 .llseek = seq_lseek,
1996 .release = single_release,
1997};
1998
1999#endif
2000
2001/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2005{
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout,
2008 u->tcp_fin_timeout,
2009 u->udp_timeout);
2010
2011#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ;
2015 }
2016
2017 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ;
2020 }
2021#endif
2022
2023#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ;
2027 }
2028#endif
2029 return 0;
2030}
2031
2032
2033#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2034#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2035#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2036 sizeof(struct ip_vs_dest_user))
2037#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2038#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2039#define MAX_ARG_LEN SVCDEST_ARG_LEN
2040
9b5b5cff 2041static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2042 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2046 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2053};
2054
c860c6b1
JV
2055static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2056 struct ip_vs_service_user *usvc_compat)
2057{
2058 usvc->af = AF_INET;
2059 usvc->protocol = usvc_compat->protocol;
2060 usvc->addr.ip = usvc_compat->addr;
2061 usvc->port = usvc_compat->port;
2062 usvc->fwmark = usvc_compat->fwmark;
2063
2064 /* Deep copy of sched_name is not needed here */
2065 usvc->sched_name = usvc_compat->sched_name;
2066
2067 usvc->flags = usvc_compat->flags;
2068 usvc->timeout = usvc_compat->timeout;
2069 usvc->netmask = usvc_compat->netmask;
2070}
2071
2072static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2073 struct ip_vs_dest_user *udest_compat)
2074{
2075 udest->addr.ip = udest_compat->addr;
2076 udest->port = udest_compat->port;
2077 udest->conn_flags = udest_compat->conn_flags;
2078 udest->weight = udest_compat->weight;
2079 udest->u_threshold = udest_compat->u_threshold;
2080 udest->l_threshold = udest_compat->l_threshold;
2081}
2082
1da177e4
LT
2083static int
2084do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2085{
2086 int ret;
2087 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2088 struct ip_vs_service_user *usvc_compat;
2089 struct ip_vs_service_user_kern usvc;
1da177e4 2090 struct ip_vs_service *svc;
c860c6b1
JV
2091 struct ip_vs_dest_user *udest_compat;
2092 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2093
2094 if (!capable(CAP_NET_ADMIN))
2095 return -EPERM;
2096
2097 if (len != set_arglen[SET_CMDID(cmd)]) {
2098 IP_VS_ERR("set_ctl: len %u != %u\n",
2099 len, set_arglen[SET_CMDID(cmd)]);
2100 return -EINVAL;
2101 }
2102
2103 if (copy_from_user(arg, user, len) != 0)
2104 return -EFAULT;
2105
2106 /* increase the module use count */
2107 ip_vs_use_count_inc();
2108
14cc3e2b 2109 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2110 ret = -ERESTARTSYS;
2111 goto out_dec;
2112 }
2113
2114 if (cmd == IP_VS_SO_SET_FLUSH) {
2115 /* Flush the virtual service */
2116 ret = ip_vs_flush();
2117 goto out_unlock;
2118 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2119 /* Set timeout values for (tcp tcpfin udp) */
2120 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2121 goto out_unlock;
2122 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2123 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2124 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2125 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2127 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2128 ret = stop_sync_thread(dm->state);
2129 goto out_unlock;
2130 }
2131
c860c6b1
JV
2132 usvc_compat = (struct ip_vs_service_user *)arg;
2133 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2134
2135 /* We only use the new structs internally, so copy userspace compat
2136 * structs to extended internal versions */
2137 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2138 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2139
2140 if (cmd == IP_VS_SO_SET_ZERO) {
2141 /* if no service address is set, zero counters in all */
c860c6b1 2142 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2143 ret = ip_vs_zero_all();
2144 goto out_unlock;
2145 }
2146 }
2147
2148 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2149 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2150 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2151 usvc.protocol, NIPQUAD(usvc.addr.ip),
2152 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2153 ret = -EFAULT;
2154 goto out_unlock;
2155 }
2156
2157 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2158 if (usvc.fwmark == 0)
b18610de
JV
2159 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2160 &usvc.addr, usvc.port);
1da177e4 2161 else
b18610de 2162 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2163
2164 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2165 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2166 ret = -ESRCH;
2167 goto out_unlock;
2168 }
2169
2170 switch (cmd) {
2171 case IP_VS_SO_SET_ADD:
2172 if (svc != NULL)
2173 ret = -EEXIST;
2174 else
c860c6b1 2175 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2176 break;
2177 case IP_VS_SO_SET_EDIT:
c860c6b1 2178 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2179 break;
2180 case IP_VS_SO_SET_DEL:
2181 ret = ip_vs_del_service(svc);
2182 if (!ret)
2183 goto out_unlock;
2184 break;
2185 case IP_VS_SO_SET_ZERO:
2186 ret = ip_vs_zero_service(svc);
2187 break;
2188 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2189 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2190 break;
2191 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2192 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2193 break;
2194 case IP_VS_SO_SET_DELDEST:
c860c6b1 2195 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2196 break;
2197 default:
2198 ret = -EINVAL;
2199 }
2200
2201 if (svc)
2202 ip_vs_service_put(svc);
2203
2204 out_unlock:
14cc3e2b 2205 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2206 out_dec:
2207 /* decrease the module use count */
2208 ip_vs_use_count_dec();
2209
2210 return ret;
2211}
2212
2213
2214static void
2215ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2216{
2217 spin_lock_bh(&src->lock);
2218 memcpy(dst, src, (char*)&src->lock - (char*)src);
2219 spin_unlock_bh(&src->lock);
2220}
2221
2222static void
2223ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2224{
2225 dst->protocol = src->protocol;
e7ade46a 2226 dst->addr = src->addr.ip;
1da177e4
LT
2227 dst->port = src->port;
2228 dst->fwmark = src->fwmark;
4da62fc7 2229 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2230 dst->flags = src->flags;
2231 dst->timeout = src->timeout / HZ;
2232 dst->netmask = src->netmask;
2233 dst->num_dests = src->num_dests;
2234 ip_vs_copy_stats(&dst->stats, &src->stats);
2235}
2236
2237static inline int
2238__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2239 struct ip_vs_get_services __user *uptr)
2240{
2241 int idx, count=0;
2242 struct ip_vs_service *svc;
2243 struct ip_vs_service_entry entry;
2244 int ret = 0;
2245
2246 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2247 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2248 /* Only expose IPv4 entries to old interface */
2249 if (svc->af != AF_INET)
2250 continue;
2251
1da177e4
LT
2252 if (count >= get->num_services)
2253 goto out;
4da62fc7 2254 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2255 ip_vs_copy_service(&entry, svc);
2256 if (copy_to_user(&uptr->entrytable[count],
2257 &entry, sizeof(entry))) {
2258 ret = -EFAULT;
2259 goto out;
2260 }
2261 count++;
2262 }
2263 }
2264
2265 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2266 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2267 /* Only expose IPv4 entries to old interface */
2268 if (svc->af != AF_INET)
2269 continue;
2270
1da177e4
LT
2271 if (count >= get->num_services)
2272 goto out;
4da62fc7 2273 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2274 ip_vs_copy_service(&entry, svc);
2275 if (copy_to_user(&uptr->entrytable[count],
2276 &entry, sizeof(entry))) {
2277 ret = -EFAULT;
2278 goto out;
2279 }
2280 count++;
2281 }
2282 }
2283 out:
2284 return ret;
2285}
2286
2287static inline int
2288__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2289 struct ip_vs_get_dests __user *uptr)
2290{
2291 struct ip_vs_service *svc;
b18610de 2292 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2293 int ret = 0;
2294
2295 if (get->fwmark)
b18610de 2296 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2297 else
b18610de
JV
2298 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2299 get->port);
2300
1da177e4
LT
2301 if (svc) {
2302 int count = 0;
2303 struct ip_vs_dest *dest;
2304 struct ip_vs_dest_entry entry;
2305
2306 list_for_each_entry(dest, &svc->destinations, n_list) {
2307 if (count >= get->num_dests)
2308 break;
2309
e7ade46a 2310 entry.addr = dest->addr.ip;
1da177e4
LT
2311 entry.port = dest->port;
2312 entry.conn_flags = atomic_read(&dest->conn_flags);
2313 entry.weight = atomic_read(&dest->weight);
2314 entry.u_threshold = dest->u_threshold;
2315 entry.l_threshold = dest->l_threshold;
2316 entry.activeconns = atomic_read(&dest->activeconns);
2317 entry.inactconns = atomic_read(&dest->inactconns);
2318 entry.persistconns = atomic_read(&dest->persistconns);
2319 ip_vs_copy_stats(&entry.stats, &dest->stats);
2320 if (copy_to_user(&uptr->entrytable[count],
2321 &entry, sizeof(entry))) {
2322 ret = -EFAULT;
2323 break;
2324 }
2325 count++;
2326 }
2327 ip_vs_service_put(svc);
2328 } else
2329 ret = -ESRCH;
2330 return ret;
2331}
2332
2333static inline void
2334__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2335{
2336#ifdef CONFIG_IP_VS_PROTO_TCP
2337 u->tcp_timeout =
2338 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2339 u->tcp_fin_timeout =
2340 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2341#endif
2342#ifdef CONFIG_IP_VS_PROTO_UDP
2343 u->udp_timeout =
2344 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2345#endif
2346}
2347
2348
2349#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2350#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2351#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2352#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2353#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2354#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2355#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2356
9b5b5cff 2357static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2358 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2359 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2360 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2361 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2362 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2363 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2364 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2365};
2366
2367static int
2368do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2369{
2370 unsigned char arg[128];
2371 int ret = 0;
2372
2373 if (!capable(CAP_NET_ADMIN))
2374 return -EPERM;
2375
2376 if (*len < get_arglen[GET_CMDID(cmd)]) {
2377 IP_VS_ERR("get_ctl: len %u < %u\n",
2378 *len, get_arglen[GET_CMDID(cmd)]);
2379 return -EINVAL;
2380 }
2381
2382 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2383 return -EFAULT;
2384
14cc3e2b 2385 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2386 return -ERESTARTSYS;
2387
2388 switch (cmd) {
2389 case IP_VS_SO_GET_VERSION:
2390 {
2391 char buf[64];
2392
2393 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2394 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2395 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2396 ret = -EFAULT;
2397 goto out;
2398 }
2399 *len = strlen(buf)+1;
2400 }
2401 break;
2402
2403 case IP_VS_SO_GET_INFO:
2404 {
2405 struct ip_vs_getinfo info;
2406 info.version = IP_VS_VERSION_CODE;
2407 info.size = IP_VS_CONN_TAB_SIZE;
2408 info.num_services = ip_vs_num_services;
2409 if (copy_to_user(user, &info, sizeof(info)) != 0)
2410 ret = -EFAULT;
2411 }
2412 break;
2413
2414 case IP_VS_SO_GET_SERVICES:
2415 {
2416 struct ip_vs_get_services *get;
2417 int size;
2418
2419 get = (struct ip_vs_get_services *)arg;
2420 size = sizeof(*get) +
2421 sizeof(struct ip_vs_service_entry) * get->num_services;
2422 if (*len != size) {
2423 IP_VS_ERR("length: %u != %u\n", *len, size);
2424 ret = -EINVAL;
2425 goto out;
2426 }
2427 ret = __ip_vs_get_service_entries(get, user);
2428 }
2429 break;
2430
2431 case IP_VS_SO_GET_SERVICE:
2432 {
2433 struct ip_vs_service_entry *entry;
2434 struct ip_vs_service *svc;
b18610de 2435 union nf_inet_addr addr;
1da177e4
LT
2436
2437 entry = (struct ip_vs_service_entry *)arg;
b18610de 2438 addr.ip = entry->addr;
1da177e4 2439 if (entry->fwmark)
b18610de 2440 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2441 else
b18610de
JV
2442 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2443 &addr, entry->port);
1da177e4
LT
2444 if (svc) {
2445 ip_vs_copy_service(entry, svc);
2446 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2447 ret = -EFAULT;
2448 ip_vs_service_put(svc);
2449 } else
2450 ret = -ESRCH;
2451 }
2452 break;
2453
2454 case IP_VS_SO_GET_DESTS:
2455 {
2456 struct ip_vs_get_dests *get;
2457 int size;
2458
2459 get = (struct ip_vs_get_dests *)arg;
2460 size = sizeof(*get) +
2461 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2462 if (*len != size) {
2463 IP_VS_ERR("length: %u != %u\n", *len, size);
2464 ret = -EINVAL;
2465 goto out;
2466 }
2467 ret = __ip_vs_get_dest_entries(get, user);
2468 }
2469 break;
2470
2471 case IP_VS_SO_GET_TIMEOUT:
2472 {
2473 struct ip_vs_timeout_user t;
2474
2475 __ip_vs_get_timeouts(&t);
2476 if (copy_to_user(user, &t, sizeof(t)) != 0)
2477 ret = -EFAULT;
2478 }
2479 break;
2480
2481 case IP_VS_SO_GET_DAEMON:
2482 {
2483 struct ip_vs_daemon_user d[2];
2484
2485 memset(&d, 0, sizeof(d));
2486 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2487 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2488 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2489 d[0].syncid = ip_vs_master_syncid;
2490 }
2491 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2492 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2493 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2494 d[1].syncid = ip_vs_backup_syncid;
2495 }
2496 if (copy_to_user(user, &d, sizeof(d)) != 0)
2497 ret = -EFAULT;
2498 }
2499 break;
2500
2501 default:
2502 ret = -EINVAL;
2503 }
2504
2505 out:
14cc3e2b 2506 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2507 return ret;
2508}
2509
2510
2511static struct nf_sockopt_ops ip_vs_sockopts = {
2512 .pf = PF_INET,
2513 .set_optmin = IP_VS_BASE_CTL,
2514 .set_optmax = IP_VS_SO_SET_MAX+1,
2515 .set = do_ip_vs_set_ctl,
2516 .get_optmin = IP_VS_BASE_CTL,
2517 .get_optmax = IP_VS_SO_GET_MAX+1,
2518 .get = do_ip_vs_get_ctl,
16fcec35 2519 .owner = THIS_MODULE,
1da177e4
LT
2520};
2521
9a812198
JV
2522/*
2523 * Generic Netlink interface
2524 */
2525
2526/* IPVS genetlink family */
2527static struct genl_family ip_vs_genl_family = {
2528 .id = GENL_ID_GENERATE,
2529 .hdrsize = 0,
2530 .name = IPVS_GENL_NAME,
2531 .version = IPVS_GENL_VERSION,
2532 .maxattr = IPVS_CMD_MAX,
2533};
2534
2535/* Policy used for first-level command attributes */
2536static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2537 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2538 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2539 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2540 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2541 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2542 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2543};
2544
2545/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2546static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2547 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2548 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2549 .len = IP_VS_IFNAME_MAXLEN },
2550 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2551};
2552
2553/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2554static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2555 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2556 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2557 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2558 .len = sizeof(union nf_inet_addr) },
2559 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2560 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2561 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2562 .len = IP_VS_SCHEDNAME_MAXLEN },
2563 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2564 .len = sizeof(struct ip_vs_flags) },
2565 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2566 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2567 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2568};
2569
2570/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2571static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2572 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2573 .len = sizeof(union nf_inet_addr) },
2574 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2575 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2576 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2577 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2578 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2579 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2580 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2581 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2582 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2583};
2584
2585static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2586 struct ip_vs_stats *stats)
2587{
2588 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2589 if (!nl_stats)
2590 return -EMSGSIZE;
2591
2592 spin_lock_bh(&stats->lock);
2593
2594 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2595 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2596 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2597 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2598 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2599 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2600 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2601 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2602 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2603 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2604
2605 spin_unlock_bh(&stats->lock);
2606
2607 nla_nest_end(skb, nl_stats);
2608
2609 return 0;
2610
2611nla_put_failure:
2612 spin_unlock_bh(&stats->lock);
2613 nla_nest_cancel(skb, nl_stats);
2614 return -EMSGSIZE;
2615}
2616
2617static int ip_vs_genl_fill_service(struct sk_buff *skb,
2618 struct ip_vs_service *svc)
2619{
2620 struct nlattr *nl_service;
2621 struct ip_vs_flags flags = { .flags = svc->flags,
2622 .mask = ~0 };
2623
2624 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2625 if (!nl_service)
2626 return -EMSGSIZE;
2627
f94fd041 2628 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2629
2630 if (svc->fwmark) {
2631 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2632 } else {
2633 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2634 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2635 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2636 }
2637
2638 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2639 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2641 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2642
2643 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2644 goto nla_put_failure;
2645
2646 nla_nest_end(skb, nl_service);
2647
2648 return 0;
2649
2650nla_put_failure:
2651 nla_nest_cancel(skb, nl_service);
2652 return -EMSGSIZE;
2653}
2654
2655static int ip_vs_genl_dump_service(struct sk_buff *skb,
2656 struct ip_vs_service *svc,
2657 struct netlink_callback *cb)
2658{
2659 void *hdr;
2660
2661 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2662 &ip_vs_genl_family, NLM_F_MULTI,
2663 IPVS_CMD_NEW_SERVICE);
2664 if (!hdr)
2665 return -EMSGSIZE;
2666
2667 if (ip_vs_genl_fill_service(skb, svc) < 0)
2668 goto nla_put_failure;
2669
2670 return genlmsg_end(skb, hdr);
2671
2672nla_put_failure:
2673 genlmsg_cancel(skb, hdr);
2674 return -EMSGSIZE;
2675}
2676
2677static int ip_vs_genl_dump_services(struct sk_buff *skb,
2678 struct netlink_callback *cb)
2679{
2680 int idx = 0, i;
2681 int start = cb->args[0];
2682 struct ip_vs_service *svc;
2683
2684 mutex_lock(&__ip_vs_mutex);
2685 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2686 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2687 if (++idx <= start)
2688 continue;
2689 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2690 idx--;
2691 goto nla_put_failure;
2692 }
2693 }
2694 }
2695
2696 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2697 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2698 if (++idx <= start)
2699 continue;
2700 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2701 idx--;
2702 goto nla_put_failure;
2703 }
2704 }
2705 }
2706
2707nla_put_failure:
2708 mutex_unlock(&__ip_vs_mutex);
2709 cb->args[0] = idx;
2710
2711 return skb->len;
2712}
2713
c860c6b1 2714static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2715 struct nlattr *nla, int full_entry)
2716{
2717 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2718 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2719
2720 /* Parse mandatory identifying service fields first */
2721 if (nla == NULL ||
2722 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2723 return -EINVAL;
2724
2725 nla_af = attrs[IPVS_SVC_ATTR_AF];
2726 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2727 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2728 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2729 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2730
2731 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2732 return -EINVAL;
2733
c860c6b1 2734 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2735#ifdef CONFIG_IP_VS_IPV6
2736 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2737#else
2738 if (usvc->af != AF_INET)
2739#endif
9a812198
JV
2740 return -EAFNOSUPPORT;
2741
2742 if (nla_fwmark) {
2743 usvc->protocol = IPPROTO_TCP;
2744 usvc->fwmark = nla_get_u32(nla_fwmark);
2745 } else {
2746 usvc->protocol = nla_get_u16(nla_protocol);
2747 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2748 usvc->port = nla_get_u16(nla_port);
2749 usvc->fwmark = 0;
2750 }
2751
2752 /* If a full entry was requested, check for the additional fields */
2753 if (full_entry) {
2754 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2755 *nla_netmask;
2756 struct ip_vs_flags flags;
2757 struct ip_vs_service *svc;
2758
2759 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2760 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2761 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2762 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2763
2764 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2765 return -EINVAL;
2766
2767 nla_memcpy(&flags, nla_flags, sizeof(flags));
2768
2769 /* prefill flags from service if it already exists */
2770 if (usvc->fwmark)
b18610de 2771 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2772 else
b18610de
JV
2773 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2774 &usvc->addr, usvc->port);
9a812198
JV
2775 if (svc) {
2776 usvc->flags = svc->flags;
2777 ip_vs_service_put(svc);
2778 } else
2779 usvc->flags = 0;
2780
2781 /* set new flags from userland */
2782 usvc->flags = (usvc->flags & ~flags.mask) |
2783 (flags.flags & flags.mask);
c860c6b1 2784 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2785 usvc->timeout = nla_get_u32(nla_timeout);
2786 usvc->netmask = nla_get_u32(nla_netmask);
2787 }
2788
2789 return 0;
2790}
2791
2792static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2793{
c860c6b1 2794 struct ip_vs_service_user_kern usvc;
9a812198
JV
2795 int ret;
2796
2797 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2798 if (ret)
2799 return ERR_PTR(ret);
2800
2801 if (usvc.fwmark)
b18610de 2802 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2803 else
b18610de
JV
2804 return __ip_vs_service_get(usvc.af, usvc.protocol,
2805 &usvc.addr, usvc.port);
9a812198
JV
2806}
2807
2808static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2809{
2810 struct nlattr *nl_dest;
2811
2812 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2813 if (!nl_dest)
2814 return -EMSGSIZE;
2815
2816 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2817 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2818
2819 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2820 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2821 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2822 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2823 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2824 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2825 atomic_read(&dest->activeconns));
2826 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2827 atomic_read(&dest->inactconns));
2828 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2829 atomic_read(&dest->persistconns));
2830
2831 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2832 goto nla_put_failure;
2833
2834 nla_nest_end(skb, nl_dest);
2835
2836 return 0;
2837
2838nla_put_failure:
2839 nla_nest_cancel(skb, nl_dest);
2840 return -EMSGSIZE;
2841}
2842
2843static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2844 struct netlink_callback *cb)
2845{
2846 void *hdr;
2847
2848 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2849 &ip_vs_genl_family, NLM_F_MULTI,
2850 IPVS_CMD_NEW_DEST);
2851 if (!hdr)
2852 return -EMSGSIZE;
2853
2854 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2855 goto nla_put_failure;
2856
2857 return genlmsg_end(skb, hdr);
2858
2859nla_put_failure:
2860 genlmsg_cancel(skb, hdr);
2861 return -EMSGSIZE;
2862}
2863
2864static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2865 struct netlink_callback *cb)
2866{
2867 int idx = 0;
2868 int start = cb->args[0];
2869 struct ip_vs_service *svc;
2870 struct ip_vs_dest *dest;
2871 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2872
2873 mutex_lock(&__ip_vs_mutex);
2874
2875 /* Try to find the service for which to dump destinations */
2876 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2877 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2878 goto out_err;
2879
2880 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2881 if (IS_ERR(svc) || svc == NULL)
2882 goto out_err;
2883
2884 /* Dump the destinations */
2885 list_for_each_entry(dest, &svc->destinations, n_list) {
2886 if (++idx <= start)
2887 continue;
2888 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2889 idx--;
2890 goto nla_put_failure;
2891 }
2892 }
2893
2894nla_put_failure:
2895 cb->args[0] = idx;
2896 ip_vs_service_put(svc);
2897
2898out_err:
2899 mutex_unlock(&__ip_vs_mutex);
2900
2901 return skb->len;
2902}
2903
c860c6b1 2904static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2905 struct nlattr *nla, int full_entry)
2906{
2907 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2908 struct nlattr *nla_addr, *nla_port;
2909
2910 /* Parse mandatory identifying destination fields first */
2911 if (nla == NULL ||
2912 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2913 return -EINVAL;
2914
2915 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2916 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2917
2918 if (!(nla_addr && nla_port))
2919 return -EINVAL;
2920
2921 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2922 udest->port = nla_get_u16(nla_port);
2923
2924 /* If a full entry was requested, check for the additional fields */
2925 if (full_entry) {
2926 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2927 *nla_l_thresh;
2928
2929 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2930 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2931 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2932 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2933
2934 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2935 return -EINVAL;
2936
2937 udest->conn_flags = nla_get_u32(nla_fwd)
2938 & IP_VS_CONN_F_FWD_MASK;
2939 udest->weight = nla_get_u32(nla_weight);
2940 udest->u_threshold = nla_get_u32(nla_u_thresh);
2941 udest->l_threshold = nla_get_u32(nla_l_thresh);
2942 }
2943
2944 return 0;
2945}
2946
2947static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2948 const char *mcast_ifn, __be32 syncid)
2949{
2950 struct nlattr *nl_daemon;
2951
2952 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2953 if (!nl_daemon)
2954 return -EMSGSIZE;
2955
2956 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2957 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2958 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2959
2960 nla_nest_end(skb, nl_daemon);
2961
2962 return 0;
2963
2964nla_put_failure:
2965 nla_nest_cancel(skb, nl_daemon);
2966 return -EMSGSIZE;
2967}
2968
2969static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2970 const char *mcast_ifn, __be32 syncid,
2971 struct netlink_callback *cb)
2972{
2973 void *hdr;
2974 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2975 &ip_vs_genl_family, NLM_F_MULTI,
2976 IPVS_CMD_NEW_DAEMON);
2977 if (!hdr)
2978 return -EMSGSIZE;
2979
2980 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2981 goto nla_put_failure;
2982
2983 return genlmsg_end(skb, hdr);
2984
2985nla_put_failure:
2986 genlmsg_cancel(skb, hdr);
2987 return -EMSGSIZE;
2988}
2989
2990static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2991 struct netlink_callback *cb)
2992{
2993 mutex_lock(&__ip_vs_mutex);
2994 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2995 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2996 ip_vs_master_mcast_ifn,
2997 ip_vs_master_syncid, cb) < 0)
2998 goto nla_put_failure;
2999
3000 cb->args[0] = 1;
3001 }
3002
3003 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3004 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3005 ip_vs_backup_mcast_ifn,
3006 ip_vs_backup_syncid, cb) < 0)
3007 goto nla_put_failure;
3008
3009 cb->args[1] = 1;
3010 }
3011
3012nla_put_failure:
3013 mutex_unlock(&__ip_vs_mutex);
3014
3015 return skb->len;
3016}
3017
3018static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3019{
3020 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3021 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3022 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3023 return -EINVAL;
3024
3025 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3026 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3027 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3028}
3029
3030static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3031{
3032 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3033 return -EINVAL;
3034
3035 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3036}
3037
3038static int ip_vs_genl_set_config(struct nlattr **attrs)
3039{
3040 struct ip_vs_timeout_user t;
3041
3042 __ip_vs_get_timeouts(&t);
3043
3044 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3045 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3046
3047 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3048 t.tcp_fin_timeout =
3049 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3050
3051 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3052 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3053
3054 return ip_vs_set_timeout(&t);
3055}
3056
3057static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3058{
3059 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3060 struct ip_vs_service_user_kern usvc;
3061 struct ip_vs_dest_user_kern udest;
9a812198
JV
3062 int ret = 0, cmd;
3063 int need_full_svc = 0, need_full_dest = 0;
3064
3065 cmd = info->genlhdr->cmd;
3066
3067 mutex_lock(&__ip_vs_mutex);
3068
3069 if (cmd == IPVS_CMD_FLUSH) {
3070 ret = ip_vs_flush();
3071 goto out;
3072 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3073 ret = ip_vs_genl_set_config(info->attrs);
3074 goto out;
3075 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3076 cmd == IPVS_CMD_DEL_DAEMON) {
3077
3078 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3079
3080 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3081 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3082 info->attrs[IPVS_CMD_ATTR_DAEMON],
3083 ip_vs_daemon_policy)) {
3084 ret = -EINVAL;
3085 goto out;
3086 }
3087
3088 if (cmd == IPVS_CMD_NEW_DAEMON)
3089 ret = ip_vs_genl_new_daemon(daemon_attrs);
3090 else
3091 ret = ip_vs_genl_del_daemon(daemon_attrs);
3092 goto out;
3093 } else if (cmd == IPVS_CMD_ZERO &&
3094 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3095 ret = ip_vs_zero_all();
3096 goto out;
3097 }
3098
3099 /* All following commands require a service argument, so check if we
3100 * received a valid one. We need a full service specification when
3101 * adding / editing a service. Only identifying members otherwise. */
3102 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3103 need_full_svc = 1;
3104
3105 ret = ip_vs_genl_parse_service(&usvc,
3106 info->attrs[IPVS_CMD_ATTR_SERVICE],
3107 need_full_svc);
3108 if (ret)
3109 goto out;
3110
3111 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3112 if (usvc.fwmark == 0)
b18610de
JV
3113 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3114 &usvc.addr, usvc.port);
9a812198 3115 else
b18610de 3116 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3117
3118 /* Unless we're adding a new service, the service must already exist */
3119 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3120 ret = -ESRCH;
3121 goto out;
3122 }
3123
3124 /* Destination commands require a valid destination argument. For
3125 * adding / editing a destination, we need a full destination
3126 * specification. */
3127 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3128 cmd == IPVS_CMD_DEL_DEST) {
3129 if (cmd != IPVS_CMD_DEL_DEST)
3130 need_full_dest = 1;
3131
3132 ret = ip_vs_genl_parse_dest(&udest,
3133 info->attrs[IPVS_CMD_ATTR_DEST],
3134 need_full_dest);
3135 if (ret)
3136 goto out;
3137 }
3138
3139 switch (cmd) {
3140 case IPVS_CMD_NEW_SERVICE:
3141 if (svc == NULL)
3142 ret = ip_vs_add_service(&usvc, &svc);
3143 else
3144 ret = -EEXIST;
3145 break;
3146 case IPVS_CMD_SET_SERVICE:
3147 ret = ip_vs_edit_service(svc, &usvc);
3148 break;
3149 case IPVS_CMD_DEL_SERVICE:
3150 ret = ip_vs_del_service(svc);
3151 break;
3152 case IPVS_CMD_NEW_DEST:
3153 ret = ip_vs_add_dest(svc, &udest);
3154 break;
3155 case IPVS_CMD_SET_DEST:
3156 ret = ip_vs_edit_dest(svc, &udest);
3157 break;
3158 case IPVS_CMD_DEL_DEST:
3159 ret = ip_vs_del_dest(svc, &udest);
3160 break;
3161 case IPVS_CMD_ZERO:
3162 ret = ip_vs_zero_service(svc);
3163 break;
3164 default:
3165 ret = -EINVAL;
3166 }
3167
3168out:
3169 if (svc)
3170 ip_vs_service_put(svc);
3171 mutex_unlock(&__ip_vs_mutex);
3172
3173 return ret;
3174}
3175
3176static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3177{
3178 struct sk_buff *msg;
3179 void *reply;
3180 int ret, cmd, reply_cmd;
3181
3182 cmd = info->genlhdr->cmd;
3183
3184 if (cmd == IPVS_CMD_GET_SERVICE)
3185 reply_cmd = IPVS_CMD_NEW_SERVICE;
3186 else if (cmd == IPVS_CMD_GET_INFO)
3187 reply_cmd = IPVS_CMD_SET_INFO;
3188 else if (cmd == IPVS_CMD_GET_CONFIG)
3189 reply_cmd = IPVS_CMD_SET_CONFIG;
3190 else {
3191 IP_VS_ERR("unknown Generic Netlink command\n");
3192 return -EINVAL;
3193 }
3194
3195 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3196 if (!msg)
3197 return -ENOMEM;
3198
3199 mutex_lock(&__ip_vs_mutex);
3200
3201 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3202 if (reply == NULL)
3203 goto nla_put_failure;
3204
3205 switch (cmd) {
3206 case IPVS_CMD_GET_SERVICE:
3207 {
3208 struct ip_vs_service *svc;
3209
3210 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3211 if (IS_ERR(svc)) {
3212 ret = PTR_ERR(svc);
3213 goto out_err;
3214 } else if (svc) {
3215 ret = ip_vs_genl_fill_service(msg, svc);
3216 ip_vs_service_put(svc);
3217 if (ret)
3218 goto nla_put_failure;
3219 } else {
3220 ret = -ESRCH;
3221 goto out_err;
3222 }
3223
3224 break;
3225 }
3226
3227 case IPVS_CMD_GET_CONFIG:
3228 {
3229 struct ip_vs_timeout_user t;
3230
3231 __ip_vs_get_timeouts(&t);
3232#ifdef CONFIG_IP_VS_PROTO_TCP
3233 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3234 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3235 t.tcp_fin_timeout);
3236#endif
3237#ifdef CONFIG_IP_VS_PROTO_UDP
3238 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3239#endif
3240
3241 break;
3242 }
3243
3244 case IPVS_CMD_GET_INFO:
3245 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3246 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3247 IP_VS_CONN_TAB_SIZE);
3248 break;
3249 }
3250
3251 genlmsg_end(msg, reply);
3252 ret = genlmsg_unicast(msg, info->snd_pid);
3253 goto out;
3254
3255nla_put_failure:
3256 IP_VS_ERR("not enough space in Netlink message\n");
3257 ret = -EMSGSIZE;
3258
3259out_err:
3260 nlmsg_free(msg);
3261out:
3262 mutex_unlock(&__ip_vs_mutex);
3263
3264 return ret;
3265}
3266
3267
3268static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3269 {
3270 .cmd = IPVS_CMD_NEW_SERVICE,
3271 .flags = GENL_ADMIN_PERM,
3272 .policy = ip_vs_cmd_policy,
3273 .doit = ip_vs_genl_set_cmd,
3274 },
3275 {
3276 .cmd = IPVS_CMD_SET_SERVICE,
3277 .flags = GENL_ADMIN_PERM,
3278 .policy = ip_vs_cmd_policy,
3279 .doit = ip_vs_genl_set_cmd,
3280 },
3281 {
3282 .cmd = IPVS_CMD_DEL_SERVICE,
3283 .flags = GENL_ADMIN_PERM,
3284 .policy = ip_vs_cmd_policy,
3285 .doit = ip_vs_genl_set_cmd,
3286 },
3287 {
3288 .cmd = IPVS_CMD_GET_SERVICE,
3289 .flags = GENL_ADMIN_PERM,
3290 .doit = ip_vs_genl_get_cmd,
3291 .dumpit = ip_vs_genl_dump_services,
3292 .policy = ip_vs_cmd_policy,
3293 },
3294 {
3295 .cmd = IPVS_CMD_NEW_DEST,
3296 .flags = GENL_ADMIN_PERM,
3297 .policy = ip_vs_cmd_policy,
3298 .doit = ip_vs_genl_set_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_SET_DEST,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3305 },
3306 {
3307 .cmd = IPVS_CMD_DEL_DEST,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .doit = ip_vs_genl_set_cmd,
3311 },
3312 {
3313 .cmd = IPVS_CMD_GET_DEST,
3314 .flags = GENL_ADMIN_PERM,
3315 .policy = ip_vs_cmd_policy,
3316 .dumpit = ip_vs_genl_dump_dests,
3317 },
3318 {
3319 .cmd = IPVS_CMD_NEW_DAEMON,
3320 .flags = GENL_ADMIN_PERM,
3321 .policy = ip_vs_cmd_policy,
3322 .doit = ip_vs_genl_set_cmd,
3323 },
3324 {
3325 .cmd = IPVS_CMD_DEL_DAEMON,
3326 .flags = GENL_ADMIN_PERM,
3327 .policy = ip_vs_cmd_policy,
3328 .doit = ip_vs_genl_set_cmd,
3329 },
3330 {
3331 .cmd = IPVS_CMD_GET_DAEMON,
3332 .flags = GENL_ADMIN_PERM,
3333 .dumpit = ip_vs_genl_dump_daemons,
3334 },
3335 {
3336 .cmd = IPVS_CMD_SET_CONFIG,
3337 .flags = GENL_ADMIN_PERM,
3338 .policy = ip_vs_cmd_policy,
3339 .doit = ip_vs_genl_set_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_GET_CONFIG,
3343 .flags = GENL_ADMIN_PERM,
3344 .doit = ip_vs_genl_get_cmd,
3345 },
3346 {
3347 .cmd = IPVS_CMD_GET_INFO,
3348 .flags = GENL_ADMIN_PERM,
3349 .doit = ip_vs_genl_get_cmd,
3350 },
3351 {
3352 .cmd = IPVS_CMD_ZERO,
3353 .flags = GENL_ADMIN_PERM,
3354 .policy = ip_vs_cmd_policy,
3355 .doit = ip_vs_genl_set_cmd,
3356 },
3357 {
3358 .cmd = IPVS_CMD_FLUSH,
3359 .flags = GENL_ADMIN_PERM,
3360 .doit = ip_vs_genl_set_cmd,
3361 },
3362};
3363
3364static int __init ip_vs_genl_register(void)
3365{
3366 int ret, i;
3367
3368 ret = genl_register_family(&ip_vs_genl_family);
3369 if (ret)
3370 return ret;
3371
3372 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3373 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3374 if (ret)
3375 goto err_out;
3376 }
3377 return 0;
3378
3379err_out:
3380 genl_unregister_family(&ip_vs_genl_family);
3381 return ret;
3382}
3383
3384static void ip_vs_genl_unregister(void)
3385{
3386 genl_unregister_family(&ip_vs_genl_family);
3387}
3388
3389/* End of Generic Netlink interface definitions */
3390
1da177e4 3391
048cf48b 3392int __init ip_vs_control_init(void)
1da177e4
LT
3393{
3394 int ret;
3395 int idx;
3396
3397 EnterFunction(2);
3398
3399 ret = nf_register_sockopt(&ip_vs_sockopts);
3400 if (ret) {
3401 IP_VS_ERR("cannot register sockopt.\n");
3402 return ret;
3403 }
3404
9a812198
JV
3405 ret = ip_vs_genl_register();
3406 if (ret) {
3407 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3408 nf_unregister_sockopt(&ip_vs_sockopts);
3409 return ret;
3410 }
3411
457c4cbc
EB
3412 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3413 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3414
90754f8e 3415 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3416
3417 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3418 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3419 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3420 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3421 }
3422 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3423 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3424 }
3425
1da177e4
LT
3426 ip_vs_new_estimator(&ip_vs_stats);
3427
3428 /* Hook the defense timer */
3429 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3430
3431 LeaveFunction(2);
3432 return 0;
3433}
3434
3435
3436void ip_vs_control_cleanup(void)
3437{
3438 EnterFunction(2);
3439 ip_vs_trash_cleanup();
3440 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3441 cancel_work_sync(&defense_work.work);
1da177e4
LT
3442 ip_vs_kill_estimator(&ip_vs_stats);
3443 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3444 proc_net_remove(&init_net, "ip_vs_stats");
3445 proc_net_remove(&init_net, "ip_vs");
9a812198 3446 ip_vs_genl_unregister();
1da177e4
LT
3447 nf_unregister_sockopt(&ip_vs_sockopts);
3448 LeaveFunction(2);
3449}