]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
ipvs: Mark tcp/udp v4 and v6 debug functions static
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0
SH
746
747 stats->conns = 0;
748 stats->inpkts = 0;
749 stats->outpkts = 0;
750 stats->inbytes = 0;
751 stats->outbytes = 0;
752
753 stats->cps = 0;
754 stats->inpps = 0;
755 stats->outpps = 0;
756 stats->inbps = 0;
757 stats->outbps = 0;
758
1da177e4 759 ip_vs_zero_estimator(stats);
e93615d0 760
3a14a313 761 spin_unlock_bh(&stats->lock);
1da177e4
LT
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777 /* check if local node and update the flags */
09571c7a
VB
778#ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
783 }
784 } else
785#endif
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
789 }
1da177e4
LT
790
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794 } else {
795 /*
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
798 */
799 write_lock_bh(&__ip_vs_rs_lock);
800 ip_vs_rs_hash(dest);
801 write_unlock_bh(&__ip_vs_rs_lock);
802 }
803 atomic_set(&dest->conn_flags, conn_flags);
804
805 /* bind the service */
806 if (!dest->svc) {
807 __ip_vs_bind_svc(dest, svc);
808 } else {
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
813 }
814 }
815
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
c860c6b1 830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
09571c7a
VB
838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if (!(atype & IPV6_ADDR_UNICAST) &&
842 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843 return -EINVAL;
844 } else
845#endif
846 {
847 atype = inet_addr_type(&init_net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849 return -EINVAL;
850 }
1da177e4 851
0da974f4 852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
853 if (dest == NULL) {
854 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855 return -ENOMEM;
856 }
1da177e4 857
c860c6b1 858 dest->af = svc->af;
1da177e4 859 dest->protocol = svc->protocol;
c860c6b1 860 dest->vaddr = svc->addr;
1da177e4
LT
861 dest->vport = svc->port;
862 dest->vfwmark = svc->fwmark;
c860c6b1 863 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
864 dest->port = udest->port;
865
866 atomic_set(&dest->activeconns, 0);
867 atomic_set(&dest->inactconns, 0);
868 atomic_set(&dest->persistconns, 0);
869 atomic_set(&dest->refcnt, 0);
870
871 INIT_LIST_HEAD(&dest->d_list);
872 spin_lock_init(&dest->dst_lock);
873 spin_lock_init(&dest->stats.lock);
874 __ip_vs_update_dest(svc, dest, udest);
875 ip_vs_new_estimator(&dest->stats);
876
877 *dest_p = dest;
878
879 LeaveFunction(2);
880 return 0;
881}
882
883
884/*
885 * Add a destination into an existing service
886 */
887static int
c860c6b1 888ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
889{
890 struct ip_vs_dest *dest;
c860c6b1 891 union nf_inet_addr daddr;
014d730d 892 __be16 dport = udest->port;
1da177e4
LT
893 int ret;
894
895 EnterFunction(2);
896
897 if (udest->weight < 0) {
898 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899 return -ERANGE;
900 }
901
902 if (udest->l_threshold > udest->u_threshold) {
903 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904 "upper threshold\n");
905 return -ERANGE;
906 }
907
c860c6b1
JV
908 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
1da177e4
LT
910 /*
911 * Check if the dest already exists in the list
912 */
7937df15
JV
913 dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
1da177e4
LT
915 if (dest != NULL) {
916 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917 return -EEXIST;
918 }
919
920 /*
921 * Check if the dest already exists in the trash and
922 * is from the same service
923 */
7937df15
JV
924 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
1da177e4 926 if (dest != NULL) {
cfc78c5a
JV
927 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
928 "dest->refcnt=%d, service %u/%s:%u\n",
929 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
930 atomic_read(&dest->refcnt),
931 dest->vfwmark,
932 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
933 ntohs(dest->vport));
934
1da177e4
LT
935 __ip_vs_update_dest(svc, dest, udest);
936
937 /*
938 * Get the destination from the trash
939 */
940 list_del(&dest->n_list);
941
942 ip_vs_new_estimator(&dest->stats);
943
944 write_lock_bh(&__ip_vs_svc_lock);
945
946 /*
947 * Wait until all other svc users go away.
948 */
949 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
950
951 list_add(&dest->n_list, &svc->destinations);
952 svc->num_dests++;
953
954 /* call the update_service function of its scheduler */
82dfb6f3
SW
955 if (svc->scheduler->update_service)
956 svc->scheduler->update_service(svc);
1da177e4
LT
957
958 write_unlock_bh(&__ip_vs_svc_lock);
959 return 0;
960 }
961
962 /*
963 * Allocate and initialize the dest structure
964 */
965 ret = ip_vs_new_dest(svc, udest, &dest);
966 if (ret) {
967 return ret;
968 }
969
970 /*
971 * Add the dest entry into the list
972 */
973 atomic_inc(&dest->refcnt);
974
975 write_lock_bh(&__ip_vs_svc_lock);
976
977 /*
978 * Wait until all other svc users go away.
979 */
980 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
981
982 list_add(&dest->n_list, &svc->destinations);
983 svc->num_dests++;
984
985 /* call the update_service function of its scheduler */
82dfb6f3
SW
986 if (svc->scheduler->update_service)
987 svc->scheduler->update_service(svc);
1da177e4
LT
988
989 write_unlock_bh(&__ip_vs_svc_lock);
990
991 LeaveFunction(2);
992
993 return 0;
994}
995
996
997/*
998 * Edit a destination in the given service
999 */
1000static int
c860c6b1 1001ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1002{
1003 struct ip_vs_dest *dest;
c860c6b1 1004 union nf_inet_addr daddr;
014d730d 1005 __be16 dport = udest->port;
1da177e4
LT
1006
1007 EnterFunction(2);
1008
1009 if (udest->weight < 0) {
1010 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1011 return -ERANGE;
1012 }
1013
1014 if (udest->l_threshold > udest->u_threshold) {
1015 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1016 "upper threshold\n");
1017 return -ERANGE;
1018 }
1019
c860c6b1
JV
1020 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1021
1da177e4
LT
1022 /*
1023 * Lookup the destination list
1024 */
7937df15
JV
1025 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1026
1da177e4
LT
1027 if (dest == NULL) {
1028 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1029 return -ENOENT;
1030 }
1031
1032 __ip_vs_update_dest(svc, dest, udest);
1033
1034 write_lock_bh(&__ip_vs_svc_lock);
1035
1036 /* Wait until all other svc users go away */
cae7ca3d 1037 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1038
1039 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1040 if (svc->scheduler->update_service)
1041 svc->scheduler->update_service(svc);
1da177e4
LT
1042
1043 write_unlock_bh(&__ip_vs_svc_lock);
1044
1045 LeaveFunction(2);
1046
1047 return 0;
1048}
1049
1050
1051/*
1052 * Delete a destination (must be already unlinked from the service)
1053 */
1054static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1055{
1056 ip_vs_kill_estimator(&dest->stats);
1057
1058 /*
1059 * Remove it from the d-linked list with the real services.
1060 */
1061 write_lock_bh(&__ip_vs_rs_lock);
1062 ip_vs_rs_unhash(dest);
1063 write_unlock_bh(&__ip_vs_rs_lock);
1064
1065 /*
1066 * Decrease the refcnt of the dest, and free the dest
1067 * if nobody refers to it (refcnt=0). Otherwise, throw
1068 * the destination into the trash.
1069 */
1070 if (atomic_dec_and_test(&dest->refcnt)) {
1071 ip_vs_dst_reset(dest);
1072 /* simply decrease svc->refcnt here, let the caller check
1073 and release the service if nobody refers to it.
1074 Only user context can release destination and service,
1075 and only one user context can update virtual service at a
1076 time, so the operation here is OK */
1077 atomic_dec(&dest->svc->refcnt);
1078 kfree(dest);
1079 } else {
cfc78c5a
JV
1080 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1081 "dest->refcnt=%d\n",
1082 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1083 ntohs(dest->port),
1084 atomic_read(&dest->refcnt));
1da177e4
LT
1085 list_add(&dest->n_list, &ip_vs_dest_trash);
1086 atomic_inc(&dest->refcnt);
1087 }
1088}
1089
1090
1091/*
1092 * Unlink a destination from the given service
1093 */
1094static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095 struct ip_vs_dest *dest,
1096 int svcupd)
1097{
1098 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1099
1100 /*
1101 * Remove it from the d-linked destination list.
1102 */
1103 list_del(&dest->n_list);
1104 svc->num_dests--;
82dfb6f3
SW
1105
1106 /*
1107 * Call the update_service function of its scheduler
1108 */
1109 if (svcupd && svc->scheduler->update_service)
1110 svc->scheduler->update_service(svc);
1da177e4
LT
1111}
1112
1113
1114/*
1115 * Delete a destination server in the given service
1116 */
1117static int
c860c6b1 1118ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1119{
1120 struct ip_vs_dest *dest;
014d730d 1121 __be16 dport = udest->port;
1da177e4
LT
1122
1123 EnterFunction(2);
1124
7937df15 1125 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1126
1da177e4
LT
1127 if (dest == NULL) {
1128 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1129 return -ENOENT;
1130 }
1131
1132 write_lock_bh(&__ip_vs_svc_lock);
1133
1134 /*
1135 * Wait until all other svc users go away.
1136 */
1137 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1138
1139 /*
1140 * Unlink dest from the service
1141 */
1142 __ip_vs_unlink_dest(svc, dest, 1);
1143
1144 write_unlock_bh(&__ip_vs_svc_lock);
1145
1146 /*
1147 * Delete the destination
1148 */
1149 __ip_vs_del_dest(dest);
1150
1151 LeaveFunction(2);
1152
1153 return 0;
1154}
1155
1156
1157/*
1158 * Add a service into the service hash table
1159 */
1160static int
c860c6b1
JV
1161ip_vs_add_service(struct ip_vs_service_user_kern *u,
1162 struct ip_vs_service **svc_p)
1da177e4
LT
1163{
1164 int ret = 0;
1165 struct ip_vs_scheduler *sched = NULL;
1166 struct ip_vs_service *svc = NULL;
1167
1168 /* increase the module use count */
1169 ip_vs_use_count_inc();
1170
1171 /* Lookup the scheduler by 'u->sched_name' */
1172 sched = ip_vs_scheduler_get(u->sched_name);
1173 if (sched == NULL) {
1174 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175 u->sched_name);
1176 ret = -ENOENT;
1177 goto out_mod_dec;
1178 }
1179
f94fd041
JV
1180#ifdef CONFIG_IP_VS_IPV6
1181 if (u->af == AF_INET6) {
1182 if (!sched->supports_ipv6) {
1183 ret = -EAFNOSUPPORT;
1184 goto out_err;
1185 }
1186 if ((u->netmask < 1) || (u->netmask > 128)) {
1187 ret = -EINVAL;
1188 goto out_err;
1189 }
1190 }
1191#endif
1192
0da974f4 1193 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1194 if (svc == NULL) {
1195 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1196 ret = -ENOMEM;
1197 goto out_err;
1198 }
1da177e4
LT
1199
1200 /* I'm the first user of the service */
1201 atomic_set(&svc->usecnt, 1);
1202 atomic_set(&svc->refcnt, 0);
1203
c860c6b1 1204 svc->af = u->af;
1da177e4 1205 svc->protocol = u->protocol;
c860c6b1 1206 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1207 svc->port = u->port;
1208 svc->fwmark = u->fwmark;
1209 svc->flags = u->flags;
1210 svc->timeout = u->timeout * HZ;
1211 svc->netmask = u->netmask;
1212
1213 INIT_LIST_HEAD(&svc->destinations);
1214 rwlock_init(&svc->sched_lock);
1215 spin_lock_init(&svc->stats.lock);
1216
1217 /* Bind the scheduler */
1218 ret = ip_vs_bind_scheduler(svc, sched);
1219 if (ret)
1220 goto out_err;
1221 sched = NULL;
1222
1223 /* Update the virtual service counters */
1224 if (svc->port == FTPPORT)
1225 atomic_inc(&ip_vs_ftpsvc_counter);
1226 else if (svc->port == 0)
1227 atomic_inc(&ip_vs_nullsvc_counter);
1228
1229 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1230
1231 /* Count only IPv4 services for old get/setsockopt interface */
1232 if (svc->af == AF_INET)
1233 ip_vs_num_services++;
1da177e4
LT
1234
1235 /* Hash the service into the service table */
1236 write_lock_bh(&__ip_vs_svc_lock);
1237 ip_vs_svc_hash(svc);
1238 write_unlock_bh(&__ip_vs_svc_lock);
1239
1240 *svc_p = svc;
1241 return 0;
1242
1243 out_err:
1244 if (svc != NULL) {
1245 if (svc->scheduler)
1246 ip_vs_unbind_scheduler(svc);
1247 if (svc->inc) {
1248 local_bh_disable();
1249 ip_vs_app_inc_put(svc->inc);
1250 local_bh_enable();
1251 }
1252 kfree(svc);
1253 }
1254 ip_vs_scheduler_put(sched);
1255
1256 out_mod_dec:
1257 /* decrease the module use count */
1258 ip_vs_use_count_dec();
1259
1260 return ret;
1261}
1262
1263
1264/*
1265 * Edit a service and bind it with a new scheduler
1266 */
1267static int
c860c6b1 1268ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1269{
1270 struct ip_vs_scheduler *sched, *old_sched;
1271 int ret = 0;
1272
1273 /*
1274 * Lookup the scheduler, by 'u->sched_name'
1275 */
1276 sched = ip_vs_scheduler_get(u->sched_name);
1277 if (sched == NULL) {
1278 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1279 u->sched_name);
1280 return -ENOENT;
1281 }
1282 old_sched = sched;
1283
f94fd041
JV
1284#ifdef CONFIG_IP_VS_IPV6
1285 if (u->af == AF_INET6) {
1286 if (!sched->supports_ipv6) {
a5ba4bf2 1287 ret = -EAFNOSUPPORT;
f94fd041
JV
1288 goto out;
1289 }
1290 if ((u->netmask < 1) || (u->netmask > 128)) {
a5ba4bf2 1291 ret = -EINVAL;
f94fd041
JV
1292 goto out;
1293 }
1294 }
1295#endif
1296
1da177e4
LT
1297 write_lock_bh(&__ip_vs_svc_lock);
1298
1299 /*
1300 * Wait until all other svc users go away.
1301 */
1302 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1303
1304 /*
1305 * Set the flags and timeout value
1306 */
1307 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1308 svc->timeout = u->timeout * HZ;
1309 svc->netmask = u->netmask;
1310
1311 old_sched = svc->scheduler;
1312 if (sched != old_sched) {
1313 /*
1314 * Unbind the old scheduler
1315 */
1316 if ((ret = ip_vs_unbind_scheduler(svc))) {
1317 old_sched = sched;
1318 goto out;
1319 }
1320
1321 /*
1322 * Bind the new scheduler
1323 */
1324 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1325 /*
1326 * If ip_vs_bind_scheduler fails, restore the old
1327 * scheduler.
1328 * The main reason of failure is out of memory.
1329 *
1330 * The question is if the old scheduler can be
1331 * restored all the time. TODO: if it cannot be
1332 * restored some time, we must delete the service,
1333 * otherwise the system may crash.
1334 */
1335 ip_vs_bind_scheduler(svc, old_sched);
1336 old_sched = sched;
1337 goto out;
1338 }
1339 }
1340
1341 out:
1342 write_unlock_bh(&__ip_vs_svc_lock);
1343
1344 if (old_sched)
1345 ip_vs_scheduler_put(old_sched);
1346
1347 return ret;
1348}
1349
1350
1351/*
1352 * Delete a service from the service list
1353 * - The service must be unlinked, unlocked and not referenced!
1354 * - We are called under _bh lock
1355 */
1356static void __ip_vs_del_service(struct ip_vs_service *svc)
1357{
1358 struct ip_vs_dest *dest, *nxt;
1359 struct ip_vs_scheduler *old_sched;
1360
f94fd041
JV
1361 /* Count only IPv4 services for old get/setsockopt interface */
1362 if (svc->af == AF_INET)
1363 ip_vs_num_services--;
1364
1da177e4
LT
1365 ip_vs_kill_estimator(&svc->stats);
1366
1367 /* Unbind scheduler */
1368 old_sched = svc->scheduler;
1369 ip_vs_unbind_scheduler(svc);
1370 if (old_sched)
1371 ip_vs_scheduler_put(old_sched);
1372
1373 /* Unbind app inc */
1374 if (svc->inc) {
1375 ip_vs_app_inc_put(svc->inc);
1376 svc->inc = NULL;
1377 }
1378
1379 /*
1380 * Unlink the whole destination list
1381 */
1382 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1383 __ip_vs_unlink_dest(svc, dest, 0);
1384 __ip_vs_del_dest(dest);
1385 }
1386
1387 /*
1388 * Update the virtual service counters
1389 */
1390 if (svc->port == FTPPORT)
1391 atomic_dec(&ip_vs_ftpsvc_counter);
1392 else if (svc->port == 0)
1393 atomic_dec(&ip_vs_nullsvc_counter);
1394
1395 /*
1396 * Free the service if nobody refers to it
1397 */
1398 if (atomic_read(&svc->refcnt) == 0)
1399 kfree(svc);
1400
1401 /* decrease the module use count */
1402 ip_vs_use_count_dec();
1403}
1404
1405/*
1406 * Delete a service from the service list
1407 */
1408static int ip_vs_del_service(struct ip_vs_service *svc)
1409{
1410 if (svc == NULL)
1411 return -EEXIST;
1412
1413 /*
1414 * Unhash it from the service table
1415 */
1416 write_lock_bh(&__ip_vs_svc_lock);
1417
1418 ip_vs_svc_unhash(svc);
1419
1420 /*
1421 * Wait until all the svc users go away.
1422 */
1423 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1424
1425 __ip_vs_del_service(svc);
1426
1427 write_unlock_bh(&__ip_vs_svc_lock);
1428
1429 return 0;
1430}
1431
1432
1433/*
1434 * Flush all the virtual services
1435 */
1436static int ip_vs_flush(void)
1437{
1438 int idx;
1439 struct ip_vs_service *svc, *nxt;
1440
1441 /*
1442 * Flush the service table hashed by <protocol,addr,port>
1443 */
1444 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1445 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1446 write_lock_bh(&__ip_vs_svc_lock);
1447 ip_vs_svc_unhash(svc);
1448 /*
1449 * Wait until all the svc users go away.
1450 */
1451 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452 __ip_vs_del_service(svc);
1453 write_unlock_bh(&__ip_vs_svc_lock);
1454 }
1455 }
1456
1457 /*
1458 * Flush the service table hashed by fwmark
1459 */
1460 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1461 list_for_each_entry_safe(svc, nxt,
1462 &ip_vs_svc_fwm_table[idx], f_list) {
1463 write_lock_bh(&__ip_vs_svc_lock);
1464 ip_vs_svc_unhash(svc);
1465 /*
1466 * Wait until all the svc users go away.
1467 */
1468 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1469 __ip_vs_del_service(svc);
1470 write_unlock_bh(&__ip_vs_svc_lock);
1471 }
1472 }
1473
1474 return 0;
1475}
1476
1477
1478/*
1479 * Zero counters in a service or all services
1480 */
1481static int ip_vs_zero_service(struct ip_vs_service *svc)
1482{
1483 struct ip_vs_dest *dest;
1484
1485 write_lock_bh(&__ip_vs_svc_lock);
1486 list_for_each_entry(dest, &svc->destinations, n_list) {
1487 ip_vs_zero_stats(&dest->stats);
1488 }
1489 ip_vs_zero_stats(&svc->stats);
1490 write_unlock_bh(&__ip_vs_svc_lock);
1491 return 0;
1492}
1493
1494static int ip_vs_zero_all(void)
1495{
1496 int idx;
1497 struct ip_vs_service *svc;
1498
1499 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1500 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1501 ip_vs_zero_service(svc);
1502 }
1503 }
1504
1505 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1506 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1507 ip_vs_zero_service(svc);
1508 }
1509 }
1510
1511 ip_vs_zero_stats(&ip_vs_stats);
1512 return 0;
1513}
1514
1515
1516static int
1517proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1519{
1520 int *valp = table->data;
1521 int val = *valp;
1522 int rc;
1523
1524 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1525 if (write && (*valp != val)) {
1526 if ((*valp < 0) || (*valp > 3)) {
1527 /* Restore the correct value */
1528 *valp = val;
1529 } else {
1da177e4 1530 update_defense_level();
1da177e4
LT
1531 }
1532 }
1533 return rc;
1534}
1535
1536
1537static int
1538proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val[2];
1543 int rc;
1544
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1547
1548 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1552 }
1553 return rc;
1554}
1555
1556
1557/*
1558 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1559 */
1560
1561static struct ctl_table vs_vars[] = {
1562 {
1da177e4
LT
1563 .procname = "amemthresh",
1564 .data = &sysctl_ip_vs_amemthresh,
1565 .maxlen = sizeof(int),
1566 .mode = 0644,
1567 .proc_handler = &proc_dointvec,
1568 },
1569#ifdef CONFIG_IP_VS_DEBUG
1570 {
1da177e4
LT
1571 .procname = "debug_level",
1572 .data = &sysctl_ip_vs_debug_level,
1573 .maxlen = sizeof(int),
1574 .mode = 0644,
1575 .proc_handler = &proc_dointvec,
1576 },
1577#endif
1578 {
1da177e4
LT
1579 .procname = "am_droprate",
1580 .data = &sysctl_ip_vs_am_droprate,
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_dointvec,
1584 },
1585 {
1da177e4
LT
1586 .procname = "drop_entry",
1587 .data = &sysctl_ip_vs_drop_entry,
1588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = &proc_do_defense_mode,
1591 },
1592 {
1da177e4
LT
1593 .procname = "drop_packet",
1594 .data = &sysctl_ip_vs_drop_packet,
1595 .maxlen = sizeof(int),
1596 .mode = 0644,
1597 .proc_handler = &proc_do_defense_mode,
1598 },
1599 {
1da177e4
LT
1600 .procname = "secure_tcp",
1601 .data = &sysctl_ip_vs_secure_tcp,
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_do_defense_mode,
1605 },
1606#if 0
1607 {
1da177e4
LT
1608 .procname = "timeout_established",
1609 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1610 .maxlen = sizeof(int),
1611 .mode = 0644,
1612 .proc_handler = &proc_dointvec_jiffies,
1613 },
1614 {
1da177e4
LT
1615 .procname = "timeout_synsent",
1616 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1617 .maxlen = sizeof(int),
1618 .mode = 0644,
1619 .proc_handler = &proc_dointvec_jiffies,
1620 },
1621 {
1da177e4
LT
1622 .procname = "timeout_synrecv",
1623 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = &proc_dointvec_jiffies,
1627 },
1628 {
1da177e4
LT
1629 .procname = "timeout_finwait",
1630 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1631 .maxlen = sizeof(int),
1632 .mode = 0644,
1633 .proc_handler = &proc_dointvec_jiffies,
1634 },
1635 {
1da177e4
LT
1636 .procname = "timeout_timewait",
1637 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1638 .maxlen = sizeof(int),
1639 .mode = 0644,
1640 .proc_handler = &proc_dointvec_jiffies,
1641 },
1642 {
1da177e4
LT
1643 .procname = "timeout_close",
1644 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1645 .maxlen = sizeof(int),
1646 .mode = 0644,
1647 .proc_handler = &proc_dointvec_jiffies,
1648 },
1649 {
1da177e4
LT
1650 .procname = "timeout_closewait",
1651 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1652 .maxlen = sizeof(int),
1653 .mode = 0644,
1654 .proc_handler = &proc_dointvec_jiffies,
1655 },
1656 {
1da177e4
LT
1657 .procname = "timeout_lastack",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
1661 .proc_handler = &proc_dointvec_jiffies,
1662 },
1663 {
1da177e4
LT
1664 .procname = "timeout_listen",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
1668 .proc_handler = &proc_dointvec_jiffies,
1669 },
1670 {
1da177e4
LT
1671 .procname = "timeout_synack",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
1675 .proc_handler = &proc_dointvec_jiffies,
1676 },
1677 {
1da177e4
LT
1678 .procname = "timeout_udp",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
1682 .proc_handler = &proc_dointvec_jiffies,
1683 },
1684 {
1da177e4
LT
1685 .procname = "timeout_icmp",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
1689 .proc_handler = &proc_dointvec_jiffies,
1690 },
1691#endif
1692 {
1da177e4
LT
1693 .procname = "cache_bypass",
1694 .data = &sysctl_ip_vs_cache_bypass,
1695 .maxlen = sizeof(int),
1696 .mode = 0644,
1697 .proc_handler = &proc_dointvec,
1698 },
1699 {
1da177e4
LT
1700 .procname = "expire_nodest_conn",
1701 .data = &sysctl_ip_vs_expire_nodest_conn,
1702 .maxlen = sizeof(int),
1703 .mode = 0644,
1704 .proc_handler = &proc_dointvec,
1705 },
1706 {
1da177e4
LT
1707 .procname = "expire_quiescent_template",
1708 .data = &sysctl_ip_vs_expire_quiescent_template,
1709 .maxlen = sizeof(int),
1710 .mode = 0644,
1711 .proc_handler = &proc_dointvec,
1712 },
1713 {
1da177e4
LT
1714 .procname = "sync_threshold",
1715 .data = &sysctl_ip_vs_sync_threshold,
1716 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1717 .mode = 0644,
1718 .proc_handler = &proc_do_sync_threshold,
1719 },
1720 {
1da177e4
LT
1721 .procname = "nat_icmp_send",
1722 .data = &sysctl_ip_vs_nat_icmp_send,
1723 .maxlen = sizeof(int),
1724 .mode = 0644,
1725 .proc_handler = &proc_dointvec,
1726 },
1727 { .ctl_name = 0 }
1728};
1729
5587da55 1730const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1731 { .procname = "net", .ctl_name = CTL_NET, },
1732 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1733 { .procname = "vs", },
1734 { }
1da177e4 1735};
90754f8e 1736EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1737
1738static struct ctl_table_header * sysctl_header;
1739
1740#ifdef CONFIG_PROC_FS
1741
1742struct ip_vs_iter {
1743 struct list_head *table;
1744 int bucket;
1745};
1746
1747/*
1748 * Write the contents of the VS rule table to a PROCfs file.
1749 * (It is kept just for backward compatibility)
1750 */
1751static inline const char *ip_vs_fwd_name(unsigned flags)
1752{
1753 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1754 case IP_VS_CONN_F_LOCALNODE:
1755 return "Local";
1756 case IP_VS_CONN_F_TUNNEL:
1757 return "Tunnel";
1758 case IP_VS_CONN_F_DROUTE:
1759 return "Route";
1760 default:
1761 return "Masq";
1762 }
1763}
1764
1765
1766/* Get the Nth entry in the two lists */
1767static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1768{
1769 struct ip_vs_iter *iter = seq->private;
1770 int idx;
1771 struct ip_vs_service *svc;
1772
1773 /* look in hash by protocol */
1774 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1775 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1776 if (pos-- == 0){
1777 iter->table = ip_vs_svc_table;
1778 iter->bucket = idx;
1779 return svc;
1780 }
1781 }
1782 }
1783
1784 /* keep looking in fwmark */
1785 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1786 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1787 if (pos-- == 0) {
1788 iter->table = ip_vs_svc_fwm_table;
1789 iter->bucket = idx;
1790 return svc;
1791 }
1792 }
1793 }
1794
1795 return NULL;
1796}
1797
1798static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1799{
1800
1801 read_lock_bh(&__ip_vs_svc_lock);
1802 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1803}
1804
1805
1806static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1807{
1808 struct list_head *e;
1809 struct ip_vs_iter *iter;
1810 struct ip_vs_service *svc;
1811
1812 ++*pos;
1813 if (v == SEQ_START_TOKEN)
1814 return ip_vs_info_array(seq,0);
1815
1816 svc = v;
1817 iter = seq->private;
1818
1819 if (iter->table == ip_vs_svc_table) {
1820 /* next service in table hashed by protocol */
1821 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1822 return list_entry(e, struct ip_vs_service, s_list);
1823
1824
1825 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1827 s_list) {
1828 return svc;
1829 }
1830 }
1831
1832 iter->table = ip_vs_svc_fwm_table;
1833 iter->bucket = -1;
1834 goto scan_fwmark;
1835 }
1836
1837 /* next service in hashed by fwmark */
1838 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1839 return list_entry(e, struct ip_vs_service, f_list);
1840
1841 scan_fwmark:
1842 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1843 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1844 f_list)
1845 return svc;
1846 }
1847
1848 return NULL;
1849}
1850
1851static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1852{
1853 read_unlock_bh(&__ip_vs_svc_lock);
1854}
1855
1856
1857static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1858{
1859 if (v == SEQ_START_TOKEN) {
1860 seq_printf(seq,
1861 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1862 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1863 seq_puts(seq,
1864 "Prot LocalAddress:Port Scheduler Flags\n");
1865 seq_puts(seq,
1866 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1867 } else {
1868 const struct ip_vs_service *svc = v;
1869 const struct ip_vs_iter *iter = seq->private;
1870 const struct ip_vs_dest *dest;
1871
667a5f18
VB
1872 if (iter->table == ip_vs_svc_table) {
1873#ifdef CONFIG_IP_VS_IPV6
1874 if (svc->af == AF_INET6)
1875 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1876 ip_vs_proto_name(svc->protocol),
1877 NIP6(svc->addr.in6),
1878 ntohs(svc->port),
1879 svc->scheduler->name);
1880 else
1881#endif
1882 seq_printf(seq, "%s %08X:%04X %s ",
1883 ip_vs_proto_name(svc->protocol),
1884 ntohl(svc->addr.ip),
1885 ntohs(svc->port),
1886 svc->scheduler->name);
1887 } else {
1da177e4
LT
1888 seq_printf(seq, "FWM %08X %s ",
1889 svc->fwmark, svc->scheduler->name);
667a5f18 1890 }
1da177e4
LT
1891
1892 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1893 seq_printf(seq, "persistent %d %08X\n",
1894 svc->timeout,
1895 ntohl(svc->netmask));
1896 else
1897 seq_putc(seq, '\n');
1898
1899 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1900#ifdef CONFIG_IP_VS_IPV6
1901 if (dest->af == AF_INET6)
1902 seq_printf(seq,
1903 " -> [" NIP6_FMT "]:%04X"
1904 " %-7s %-6d %-10d %-10d\n",
1905 NIP6(dest->addr.in6),
1906 ntohs(dest->port),
1907 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1908 atomic_read(&dest->weight),
1909 atomic_read(&dest->activeconns),
1910 atomic_read(&dest->inactconns));
1911 else
1912#endif
1913 seq_printf(seq,
1914 " -> %08X:%04X "
1915 "%-7s %-6d %-10d %-10d\n",
1916 ntohl(dest->addr.ip),
1917 ntohs(dest->port),
1918 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1919 atomic_read(&dest->weight),
1920 atomic_read(&dest->activeconns),
1921 atomic_read(&dest->inactconns));
1922
1da177e4
LT
1923 }
1924 }
1925 return 0;
1926}
1927
56b3d975 1928static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1929 .start = ip_vs_info_seq_start,
1930 .next = ip_vs_info_seq_next,
1931 .stop = ip_vs_info_seq_stop,
1932 .show = ip_vs_info_seq_show,
1933};
1934
1935static int ip_vs_info_open(struct inode *inode, struct file *file)
1936{
cf7732e4
PE
1937 return seq_open_private(file, &ip_vs_info_seq_ops,
1938 sizeof(struct ip_vs_iter));
1da177e4
LT
1939}
1940
9a32144e 1941static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1942 .owner = THIS_MODULE,
1943 .open = ip_vs_info_open,
1944 .read = seq_read,
1945 .llseek = seq_lseek,
1946 .release = seq_release_private,
1947};
1948
1949#endif
1950
519e49e8
SW
1951struct ip_vs_stats ip_vs_stats = {
1952 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1953};
1da177e4
LT
1954
1955#ifdef CONFIG_PROC_FS
1956static int ip_vs_stats_show(struct seq_file *seq, void *v)
1957{
1958
1959/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1960 seq_puts(seq,
1961 " Total Incoming Outgoing Incoming Outgoing\n");
1962 seq_printf(seq,
1963 " Conns Packets Packets Bytes Bytes\n");
1964
1965 spin_lock_bh(&ip_vs_stats.lock);
1966 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1967 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1968 (unsigned long long) ip_vs_stats.inbytes,
1969 (unsigned long long) ip_vs_stats.outbytes);
1970
1971/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1972 seq_puts(seq,
1973 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1974 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1975 ip_vs_stats.cps,
1976 ip_vs_stats.inpps,
1977 ip_vs_stats.outpps,
1978 ip_vs_stats.inbps,
1979 ip_vs_stats.outbps);
1980 spin_unlock_bh(&ip_vs_stats.lock);
1981
1982 return 0;
1983}
1984
1985static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1986{
1987 return single_open(file, ip_vs_stats_show, NULL);
1988}
1989
9a32144e 1990static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1991 .owner = THIS_MODULE,
1992 .open = ip_vs_stats_seq_open,
1993 .read = seq_read,
1994 .llseek = seq_lseek,
1995 .release = single_release,
1996};
1997
1998#endif
1999
2000/*
2001 * Set timeout values for tcp tcpfin udp in the timeout_table.
2002 */
2003static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2004{
2005 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2006 u->tcp_timeout,
2007 u->tcp_fin_timeout,
2008 u->udp_timeout);
2009
2010#ifdef CONFIG_IP_VS_PROTO_TCP
2011 if (u->tcp_timeout) {
2012 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2013 = u->tcp_timeout * HZ;
2014 }
2015
2016 if (u->tcp_fin_timeout) {
2017 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2018 = u->tcp_fin_timeout * HZ;
2019 }
2020#endif
2021
2022#ifdef CONFIG_IP_VS_PROTO_UDP
2023 if (u->udp_timeout) {
2024 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2025 = u->udp_timeout * HZ;
2026 }
2027#endif
2028 return 0;
2029}
2030
2031
2032#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2033#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2034#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2035 sizeof(struct ip_vs_dest_user))
2036#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2037#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2038#define MAX_ARG_LEN SVCDEST_ARG_LEN
2039
9b5b5cff 2040static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2041 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2045 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2046 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2052};
2053
c860c6b1
JV
2054static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2055 struct ip_vs_service_user *usvc_compat)
2056{
2057 usvc->af = AF_INET;
2058 usvc->protocol = usvc_compat->protocol;
2059 usvc->addr.ip = usvc_compat->addr;
2060 usvc->port = usvc_compat->port;
2061 usvc->fwmark = usvc_compat->fwmark;
2062
2063 /* Deep copy of sched_name is not needed here */
2064 usvc->sched_name = usvc_compat->sched_name;
2065
2066 usvc->flags = usvc_compat->flags;
2067 usvc->timeout = usvc_compat->timeout;
2068 usvc->netmask = usvc_compat->netmask;
2069}
2070
2071static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2072 struct ip_vs_dest_user *udest_compat)
2073{
2074 udest->addr.ip = udest_compat->addr;
2075 udest->port = udest_compat->port;
2076 udest->conn_flags = udest_compat->conn_flags;
2077 udest->weight = udest_compat->weight;
2078 udest->u_threshold = udest_compat->u_threshold;
2079 udest->l_threshold = udest_compat->l_threshold;
2080}
2081
1da177e4
LT
2082static int
2083do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2084{
2085 int ret;
2086 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2087 struct ip_vs_service_user *usvc_compat;
2088 struct ip_vs_service_user_kern usvc;
1da177e4 2089 struct ip_vs_service *svc;
c860c6b1
JV
2090 struct ip_vs_dest_user *udest_compat;
2091 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2092
2093 if (!capable(CAP_NET_ADMIN))
2094 return -EPERM;
2095
2096 if (len != set_arglen[SET_CMDID(cmd)]) {
2097 IP_VS_ERR("set_ctl: len %u != %u\n",
2098 len, set_arglen[SET_CMDID(cmd)]);
2099 return -EINVAL;
2100 }
2101
2102 if (copy_from_user(arg, user, len) != 0)
2103 return -EFAULT;
2104
2105 /* increase the module use count */
2106 ip_vs_use_count_inc();
2107
14cc3e2b 2108 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2109 ret = -ERESTARTSYS;
2110 goto out_dec;
2111 }
2112
2113 if (cmd == IP_VS_SO_SET_FLUSH) {
2114 /* Flush the virtual service */
2115 ret = ip_vs_flush();
2116 goto out_unlock;
2117 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2118 /* Set timeout values for (tcp tcpfin udp) */
2119 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2120 goto out_unlock;
2121 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2122 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2123 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2124 goto out_unlock;
2125 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2126 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2127 ret = stop_sync_thread(dm->state);
2128 goto out_unlock;
2129 }
2130
c860c6b1
JV
2131 usvc_compat = (struct ip_vs_service_user *)arg;
2132 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2133
2134 /* We only use the new structs internally, so copy userspace compat
2135 * structs to extended internal versions */
2136 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2137 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2138
2139 if (cmd == IP_VS_SO_SET_ZERO) {
2140 /* if no service address is set, zero counters in all */
c860c6b1 2141 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2142 ret = ip_vs_zero_all();
2143 goto out_unlock;
2144 }
2145 }
2146
2147 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2148 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2149 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2150 usvc.protocol, NIPQUAD(usvc.addr.ip),
2151 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2152 ret = -EFAULT;
2153 goto out_unlock;
2154 }
2155
2156 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2157 if (usvc.fwmark == 0)
b18610de
JV
2158 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2159 &usvc.addr, usvc.port);
1da177e4 2160 else
b18610de 2161 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2162
2163 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2164 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2165 ret = -ESRCH;
2166 goto out_unlock;
2167 }
2168
2169 switch (cmd) {
2170 case IP_VS_SO_SET_ADD:
2171 if (svc != NULL)
2172 ret = -EEXIST;
2173 else
c860c6b1 2174 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2175 break;
2176 case IP_VS_SO_SET_EDIT:
c860c6b1 2177 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2178 break;
2179 case IP_VS_SO_SET_DEL:
2180 ret = ip_vs_del_service(svc);
2181 if (!ret)
2182 goto out_unlock;
2183 break;
2184 case IP_VS_SO_SET_ZERO:
2185 ret = ip_vs_zero_service(svc);
2186 break;
2187 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2188 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2189 break;
2190 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2191 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2192 break;
2193 case IP_VS_SO_SET_DELDEST:
c860c6b1 2194 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2195 break;
2196 default:
2197 ret = -EINVAL;
2198 }
2199
2200 if (svc)
2201 ip_vs_service_put(svc);
2202
2203 out_unlock:
14cc3e2b 2204 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2205 out_dec:
2206 /* decrease the module use count */
2207 ip_vs_use_count_dec();
2208
2209 return ret;
2210}
2211
2212
2213static void
2214ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2215{
2216 spin_lock_bh(&src->lock);
2217 memcpy(dst, src, (char*)&src->lock - (char*)src);
2218 spin_unlock_bh(&src->lock);
2219}
2220
2221static void
2222ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2223{
2224 dst->protocol = src->protocol;
e7ade46a 2225 dst->addr = src->addr.ip;
1da177e4
LT
2226 dst->port = src->port;
2227 dst->fwmark = src->fwmark;
4da62fc7 2228 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2229 dst->flags = src->flags;
2230 dst->timeout = src->timeout / HZ;
2231 dst->netmask = src->netmask;
2232 dst->num_dests = src->num_dests;
2233 ip_vs_copy_stats(&dst->stats, &src->stats);
2234}
2235
2236static inline int
2237__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2238 struct ip_vs_get_services __user *uptr)
2239{
2240 int idx, count=0;
2241 struct ip_vs_service *svc;
2242 struct ip_vs_service_entry entry;
2243 int ret = 0;
2244
2245 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2246 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2247 /* Only expose IPv4 entries to old interface */
2248 if (svc->af != AF_INET)
2249 continue;
2250
1da177e4
LT
2251 if (count >= get->num_services)
2252 goto out;
4da62fc7 2253 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2254 ip_vs_copy_service(&entry, svc);
2255 if (copy_to_user(&uptr->entrytable[count],
2256 &entry, sizeof(entry))) {
2257 ret = -EFAULT;
2258 goto out;
2259 }
2260 count++;
2261 }
2262 }
2263
2264 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2265 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2266 /* Only expose IPv4 entries to old interface */
2267 if (svc->af != AF_INET)
2268 continue;
2269
1da177e4
LT
2270 if (count >= get->num_services)
2271 goto out;
4da62fc7 2272 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2273 ip_vs_copy_service(&entry, svc);
2274 if (copy_to_user(&uptr->entrytable[count],
2275 &entry, sizeof(entry))) {
2276 ret = -EFAULT;
2277 goto out;
2278 }
2279 count++;
2280 }
2281 }
2282 out:
2283 return ret;
2284}
2285
2286static inline int
2287__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2288 struct ip_vs_get_dests __user *uptr)
2289{
2290 struct ip_vs_service *svc;
b18610de 2291 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2292 int ret = 0;
2293
2294 if (get->fwmark)
b18610de 2295 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2296 else
b18610de
JV
2297 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2298 get->port);
2299
1da177e4
LT
2300 if (svc) {
2301 int count = 0;
2302 struct ip_vs_dest *dest;
2303 struct ip_vs_dest_entry entry;
2304
2305 list_for_each_entry(dest, &svc->destinations, n_list) {
2306 if (count >= get->num_dests)
2307 break;
2308
e7ade46a 2309 entry.addr = dest->addr.ip;
1da177e4
LT
2310 entry.port = dest->port;
2311 entry.conn_flags = atomic_read(&dest->conn_flags);
2312 entry.weight = atomic_read(&dest->weight);
2313 entry.u_threshold = dest->u_threshold;
2314 entry.l_threshold = dest->l_threshold;
2315 entry.activeconns = atomic_read(&dest->activeconns);
2316 entry.inactconns = atomic_read(&dest->inactconns);
2317 entry.persistconns = atomic_read(&dest->persistconns);
2318 ip_vs_copy_stats(&entry.stats, &dest->stats);
2319 if (copy_to_user(&uptr->entrytable[count],
2320 &entry, sizeof(entry))) {
2321 ret = -EFAULT;
2322 break;
2323 }
2324 count++;
2325 }
2326 ip_vs_service_put(svc);
2327 } else
2328 ret = -ESRCH;
2329 return ret;
2330}
2331
2332static inline void
2333__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2334{
2335#ifdef CONFIG_IP_VS_PROTO_TCP
2336 u->tcp_timeout =
2337 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2338 u->tcp_fin_timeout =
2339 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2340#endif
2341#ifdef CONFIG_IP_VS_PROTO_UDP
2342 u->udp_timeout =
2343 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2344#endif
2345}
2346
2347
2348#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2349#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2350#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2351#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2352#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2353#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2354#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2355
9b5b5cff 2356static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2357 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2358 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2359 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2360 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2361 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2362 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2363 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2364};
2365
2366static int
2367do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2368{
2369 unsigned char arg[128];
2370 int ret = 0;
2371
2372 if (!capable(CAP_NET_ADMIN))
2373 return -EPERM;
2374
2375 if (*len < get_arglen[GET_CMDID(cmd)]) {
2376 IP_VS_ERR("get_ctl: len %u < %u\n",
2377 *len, get_arglen[GET_CMDID(cmd)]);
2378 return -EINVAL;
2379 }
2380
2381 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2382 return -EFAULT;
2383
14cc3e2b 2384 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2385 return -ERESTARTSYS;
2386
2387 switch (cmd) {
2388 case IP_VS_SO_GET_VERSION:
2389 {
2390 char buf[64];
2391
2392 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2393 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2394 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2395 ret = -EFAULT;
2396 goto out;
2397 }
2398 *len = strlen(buf)+1;
2399 }
2400 break;
2401
2402 case IP_VS_SO_GET_INFO:
2403 {
2404 struct ip_vs_getinfo info;
2405 info.version = IP_VS_VERSION_CODE;
2406 info.size = IP_VS_CONN_TAB_SIZE;
2407 info.num_services = ip_vs_num_services;
2408 if (copy_to_user(user, &info, sizeof(info)) != 0)
2409 ret = -EFAULT;
2410 }
2411 break;
2412
2413 case IP_VS_SO_GET_SERVICES:
2414 {
2415 struct ip_vs_get_services *get;
2416 int size;
2417
2418 get = (struct ip_vs_get_services *)arg;
2419 size = sizeof(*get) +
2420 sizeof(struct ip_vs_service_entry) * get->num_services;
2421 if (*len != size) {
2422 IP_VS_ERR("length: %u != %u\n", *len, size);
2423 ret = -EINVAL;
2424 goto out;
2425 }
2426 ret = __ip_vs_get_service_entries(get, user);
2427 }
2428 break;
2429
2430 case IP_VS_SO_GET_SERVICE:
2431 {
2432 struct ip_vs_service_entry *entry;
2433 struct ip_vs_service *svc;
b18610de 2434 union nf_inet_addr addr;
1da177e4
LT
2435
2436 entry = (struct ip_vs_service_entry *)arg;
b18610de 2437 addr.ip = entry->addr;
1da177e4 2438 if (entry->fwmark)
b18610de 2439 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2440 else
b18610de
JV
2441 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2442 &addr, entry->port);
1da177e4
LT
2443 if (svc) {
2444 ip_vs_copy_service(entry, svc);
2445 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2446 ret = -EFAULT;
2447 ip_vs_service_put(svc);
2448 } else
2449 ret = -ESRCH;
2450 }
2451 break;
2452
2453 case IP_VS_SO_GET_DESTS:
2454 {
2455 struct ip_vs_get_dests *get;
2456 int size;
2457
2458 get = (struct ip_vs_get_dests *)arg;
2459 size = sizeof(*get) +
2460 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2461 if (*len != size) {
2462 IP_VS_ERR("length: %u != %u\n", *len, size);
2463 ret = -EINVAL;
2464 goto out;
2465 }
2466 ret = __ip_vs_get_dest_entries(get, user);
2467 }
2468 break;
2469
2470 case IP_VS_SO_GET_TIMEOUT:
2471 {
2472 struct ip_vs_timeout_user t;
2473
2474 __ip_vs_get_timeouts(&t);
2475 if (copy_to_user(user, &t, sizeof(t)) != 0)
2476 ret = -EFAULT;
2477 }
2478 break;
2479
2480 case IP_VS_SO_GET_DAEMON:
2481 {
2482 struct ip_vs_daemon_user d[2];
2483
2484 memset(&d, 0, sizeof(d));
2485 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2486 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2487 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2488 d[0].syncid = ip_vs_master_syncid;
2489 }
2490 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2491 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2492 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2493 d[1].syncid = ip_vs_backup_syncid;
2494 }
2495 if (copy_to_user(user, &d, sizeof(d)) != 0)
2496 ret = -EFAULT;
2497 }
2498 break;
2499
2500 default:
2501 ret = -EINVAL;
2502 }
2503
2504 out:
14cc3e2b 2505 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2506 return ret;
2507}
2508
2509
2510static struct nf_sockopt_ops ip_vs_sockopts = {
2511 .pf = PF_INET,
2512 .set_optmin = IP_VS_BASE_CTL,
2513 .set_optmax = IP_VS_SO_SET_MAX+1,
2514 .set = do_ip_vs_set_ctl,
2515 .get_optmin = IP_VS_BASE_CTL,
2516 .get_optmax = IP_VS_SO_GET_MAX+1,
2517 .get = do_ip_vs_get_ctl,
16fcec35 2518 .owner = THIS_MODULE,
1da177e4
LT
2519};
2520
9a812198
JV
2521/*
2522 * Generic Netlink interface
2523 */
2524
2525/* IPVS genetlink family */
2526static struct genl_family ip_vs_genl_family = {
2527 .id = GENL_ID_GENERATE,
2528 .hdrsize = 0,
2529 .name = IPVS_GENL_NAME,
2530 .version = IPVS_GENL_VERSION,
2531 .maxattr = IPVS_CMD_MAX,
2532};
2533
2534/* Policy used for first-level command attributes */
2535static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2536 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2537 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2538 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2539 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2540 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2541 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2542};
2543
2544/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2545static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2546 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2547 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2548 .len = IP_VS_IFNAME_MAXLEN },
2549 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2550};
2551
2552/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2553static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2554 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2555 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2556 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2557 .len = sizeof(union nf_inet_addr) },
2558 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2559 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2560 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2561 .len = IP_VS_SCHEDNAME_MAXLEN },
2562 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2563 .len = sizeof(struct ip_vs_flags) },
2564 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2565 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2566 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2567};
2568
2569/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2570static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2571 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2572 .len = sizeof(union nf_inet_addr) },
2573 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2574 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2575 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2576 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2577 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2578 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2579 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2580 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2581 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2582};
2583
2584static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2585 struct ip_vs_stats *stats)
2586{
2587 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2588 if (!nl_stats)
2589 return -EMSGSIZE;
2590
2591 spin_lock_bh(&stats->lock);
2592
2593 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2594 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2595 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2596 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2597 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2598 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2599 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2600 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2601 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2602 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2603
2604 spin_unlock_bh(&stats->lock);
2605
2606 nla_nest_end(skb, nl_stats);
2607
2608 return 0;
2609
2610nla_put_failure:
2611 spin_unlock_bh(&stats->lock);
2612 nla_nest_cancel(skb, nl_stats);
2613 return -EMSGSIZE;
2614}
2615
2616static int ip_vs_genl_fill_service(struct sk_buff *skb,
2617 struct ip_vs_service *svc)
2618{
2619 struct nlattr *nl_service;
2620 struct ip_vs_flags flags = { .flags = svc->flags,
2621 .mask = ~0 };
2622
2623 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2624 if (!nl_service)
2625 return -EMSGSIZE;
2626
f94fd041 2627 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2628
2629 if (svc->fwmark) {
2630 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2631 } else {
2632 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2633 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2634 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2635 }
2636
2637 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2638 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2639 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2641
2642 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2643 goto nla_put_failure;
2644
2645 nla_nest_end(skb, nl_service);
2646
2647 return 0;
2648
2649nla_put_failure:
2650 nla_nest_cancel(skb, nl_service);
2651 return -EMSGSIZE;
2652}
2653
2654static int ip_vs_genl_dump_service(struct sk_buff *skb,
2655 struct ip_vs_service *svc,
2656 struct netlink_callback *cb)
2657{
2658 void *hdr;
2659
2660 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2661 &ip_vs_genl_family, NLM_F_MULTI,
2662 IPVS_CMD_NEW_SERVICE);
2663 if (!hdr)
2664 return -EMSGSIZE;
2665
2666 if (ip_vs_genl_fill_service(skb, svc) < 0)
2667 goto nla_put_failure;
2668
2669 return genlmsg_end(skb, hdr);
2670
2671nla_put_failure:
2672 genlmsg_cancel(skb, hdr);
2673 return -EMSGSIZE;
2674}
2675
2676static int ip_vs_genl_dump_services(struct sk_buff *skb,
2677 struct netlink_callback *cb)
2678{
2679 int idx = 0, i;
2680 int start = cb->args[0];
2681 struct ip_vs_service *svc;
2682
2683 mutex_lock(&__ip_vs_mutex);
2684 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2685 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2686 if (++idx <= start)
2687 continue;
2688 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2689 idx--;
2690 goto nla_put_failure;
2691 }
2692 }
2693 }
2694
2695 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2696 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2697 if (++idx <= start)
2698 continue;
2699 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2700 idx--;
2701 goto nla_put_failure;
2702 }
2703 }
2704 }
2705
2706nla_put_failure:
2707 mutex_unlock(&__ip_vs_mutex);
2708 cb->args[0] = idx;
2709
2710 return skb->len;
2711}
2712
c860c6b1 2713static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2714 struct nlattr *nla, int full_entry)
2715{
2716 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2717 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2718
2719 /* Parse mandatory identifying service fields first */
2720 if (nla == NULL ||
2721 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2722 return -EINVAL;
2723
2724 nla_af = attrs[IPVS_SVC_ATTR_AF];
2725 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2726 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2727 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2728 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2729
2730 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2731 return -EINVAL;
2732
c860c6b1 2733 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2734#ifdef CONFIG_IP_VS_IPV6
2735 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2736#else
2737 if (usvc->af != AF_INET)
2738#endif
9a812198
JV
2739 return -EAFNOSUPPORT;
2740
2741 if (nla_fwmark) {
2742 usvc->protocol = IPPROTO_TCP;
2743 usvc->fwmark = nla_get_u32(nla_fwmark);
2744 } else {
2745 usvc->protocol = nla_get_u16(nla_protocol);
2746 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2747 usvc->port = nla_get_u16(nla_port);
2748 usvc->fwmark = 0;
2749 }
2750
2751 /* If a full entry was requested, check for the additional fields */
2752 if (full_entry) {
2753 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2754 *nla_netmask;
2755 struct ip_vs_flags flags;
2756 struct ip_vs_service *svc;
2757
2758 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2759 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2760 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2761 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2762
2763 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2764 return -EINVAL;
2765
2766 nla_memcpy(&flags, nla_flags, sizeof(flags));
2767
2768 /* prefill flags from service if it already exists */
2769 if (usvc->fwmark)
b18610de 2770 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2771 else
b18610de
JV
2772 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2773 &usvc->addr, usvc->port);
9a812198
JV
2774 if (svc) {
2775 usvc->flags = svc->flags;
2776 ip_vs_service_put(svc);
2777 } else
2778 usvc->flags = 0;
2779
2780 /* set new flags from userland */
2781 usvc->flags = (usvc->flags & ~flags.mask) |
2782 (flags.flags & flags.mask);
c860c6b1 2783 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2784 usvc->timeout = nla_get_u32(nla_timeout);
2785 usvc->netmask = nla_get_u32(nla_netmask);
2786 }
2787
2788 return 0;
2789}
2790
2791static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2792{
c860c6b1 2793 struct ip_vs_service_user_kern usvc;
9a812198
JV
2794 int ret;
2795
2796 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2797 if (ret)
2798 return ERR_PTR(ret);
2799
2800 if (usvc.fwmark)
b18610de 2801 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2802 else
b18610de
JV
2803 return __ip_vs_service_get(usvc.af, usvc.protocol,
2804 &usvc.addr, usvc.port);
9a812198
JV
2805}
2806
2807static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2808{
2809 struct nlattr *nl_dest;
2810
2811 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2812 if (!nl_dest)
2813 return -EMSGSIZE;
2814
2815 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2816 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2817
2818 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2819 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2820 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2821 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2822 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2823 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2824 atomic_read(&dest->activeconns));
2825 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2826 atomic_read(&dest->inactconns));
2827 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2828 atomic_read(&dest->persistconns));
2829
2830 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2831 goto nla_put_failure;
2832
2833 nla_nest_end(skb, nl_dest);
2834
2835 return 0;
2836
2837nla_put_failure:
2838 nla_nest_cancel(skb, nl_dest);
2839 return -EMSGSIZE;
2840}
2841
2842static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2843 struct netlink_callback *cb)
2844{
2845 void *hdr;
2846
2847 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2848 &ip_vs_genl_family, NLM_F_MULTI,
2849 IPVS_CMD_NEW_DEST);
2850 if (!hdr)
2851 return -EMSGSIZE;
2852
2853 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2854 goto nla_put_failure;
2855
2856 return genlmsg_end(skb, hdr);
2857
2858nla_put_failure:
2859 genlmsg_cancel(skb, hdr);
2860 return -EMSGSIZE;
2861}
2862
2863static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2864 struct netlink_callback *cb)
2865{
2866 int idx = 0;
2867 int start = cb->args[0];
2868 struct ip_vs_service *svc;
2869 struct ip_vs_dest *dest;
2870 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2871
2872 mutex_lock(&__ip_vs_mutex);
2873
2874 /* Try to find the service for which to dump destinations */
2875 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2876 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2877 goto out_err;
2878
2879 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2880 if (IS_ERR(svc) || svc == NULL)
2881 goto out_err;
2882
2883 /* Dump the destinations */
2884 list_for_each_entry(dest, &svc->destinations, n_list) {
2885 if (++idx <= start)
2886 continue;
2887 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2888 idx--;
2889 goto nla_put_failure;
2890 }
2891 }
2892
2893nla_put_failure:
2894 cb->args[0] = idx;
2895 ip_vs_service_put(svc);
2896
2897out_err:
2898 mutex_unlock(&__ip_vs_mutex);
2899
2900 return skb->len;
2901}
2902
c860c6b1 2903static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2904 struct nlattr *nla, int full_entry)
2905{
2906 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2907 struct nlattr *nla_addr, *nla_port;
2908
2909 /* Parse mandatory identifying destination fields first */
2910 if (nla == NULL ||
2911 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2912 return -EINVAL;
2913
2914 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2915 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2916
2917 if (!(nla_addr && nla_port))
2918 return -EINVAL;
2919
2920 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2921 udest->port = nla_get_u16(nla_port);
2922
2923 /* If a full entry was requested, check for the additional fields */
2924 if (full_entry) {
2925 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2926 *nla_l_thresh;
2927
2928 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2929 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2930 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2931 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2932
2933 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2934 return -EINVAL;
2935
2936 udest->conn_flags = nla_get_u32(nla_fwd)
2937 & IP_VS_CONN_F_FWD_MASK;
2938 udest->weight = nla_get_u32(nla_weight);
2939 udest->u_threshold = nla_get_u32(nla_u_thresh);
2940 udest->l_threshold = nla_get_u32(nla_l_thresh);
2941 }
2942
2943 return 0;
2944}
2945
2946static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2947 const char *mcast_ifn, __be32 syncid)
2948{
2949 struct nlattr *nl_daemon;
2950
2951 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2952 if (!nl_daemon)
2953 return -EMSGSIZE;
2954
2955 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2956 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2957 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2958
2959 nla_nest_end(skb, nl_daemon);
2960
2961 return 0;
2962
2963nla_put_failure:
2964 nla_nest_cancel(skb, nl_daemon);
2965 return -EMSGSIZE;
2966}
2967
2968static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2969 const char *mcast_ifn, __be32 syncid,
2970 struct netlink_callback *cb)
2971{
2972 void *hdr;
2973 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2974 &ip_vs_genl_family, NLM_F_MULTI,
2975 IPVS_CMD_NEW_DAEMON);
2976 if (!hdr)
2977 return -EMSGSIZE;
2978
2979 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2980 goto nla_put_failure;
2981
2982 return genlmsg_end(skb, hdr);
2983
2984nla_put_failure:
2985 genlmsg_cancel(skb, hdr);
2986 return -EMSGSIZE;
2987}
2988
2989static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2990 struct netlink_callback *cb)
2991{
2992 mutex_lock(&__ip_vs_mutex);
2993 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2994 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2995 ip_vs_master_mcast_ifn,
2996 ip_vs_master_syncid, cb) < 0)
2997 goto nla_put_failure;
2998
2999 cb->args[0] = 1;
3000 }
3001
3002 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3003 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3004 ip_vs_backup_mcast_ifn,
3005 ip_vs_backup_syncid, cb) < 0)
3006 goto nla_put_failure;
3007
3008 cb->args[1] = 1;
3009 }
3010
3011nla_put_failure:
3012 mutex_unlock(&__ip_vs_mutex);
3013
3014 return skb->len;
3015}
3016
3017static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3018{
3019 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3020 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3021 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3022 return -EINVAL;
3023
3024 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3025 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3026 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3027}
3028
3029static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3030{
3031 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3032 return -EINVAL;
3033
3034 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3035}
3036
3037static int ip_vs_genl_set_config(struct nlattr **attrs)
3038{
3039 struct ip_vs_timeout_user t;
3040
3041 __ip_vs_get_timeouts(&t);
3042
3043 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3044 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3045
3046 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3047 t.tcp_fin_timeout =
3048 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3049
3050 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3051 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3052
3053 return ip_vs_set_timeout(&t);
3054}
3055
3056static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3057{
3058 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3059 struct ip_vs_service_user_kern usvc;
3060 struct ip_vs_dest_user_kern udest;
9a812198
JV
3061 int ret = 0, cmd;
3062 int need_full_svc = 0, need_full_dest = 0;
3063
3064 cmd = info->genlhdr->cmd;
3065
3066 mutex_lock(&__ip_vs_mutex);
3067
3068 if (cmd == IPVS_CMD_FLUSH) {
3069 ret = ip_vs_flush();
3070 goto out;
3071 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3072 ret = ip_vs_genl_set_config(info->attrs);
3073 goto out;
3074 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3075 cmd == IPVS_CMD_DEL_DAEMON) {
3076
3077 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3078
3079 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3080 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3081 info->attrs[IPVS_CMD_ATTR_DAEMON],
3082 ip_vs_daemon_policy)) {
3083 ret = -EINVAL;
3084 goto out;
3085 }
3086
3087 if (cmd == IPVS_CMD_NEW_DAEMON)
3088 ret = ip_vs_genl_new_daemon(daemon_attrs);
3089 else
3090 ret = ip_vs_genl_del_daemon(daemon_attrs);
3091 goto out;
3092 } else if (cmd == IPVS_CMD_ZERO &&
3093 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3094 ret = ip_vs_zero_all();
3095 goto out;
3096 }
3097
3098 /* All following commands require a service argument, so check if we
3099 * received a valid one. We need a full service specification when
3100 * adding / editing a service. Only identifying members otherwise. */
3101 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3102 need_full_svc = 1;
3103
3104 ret = ip_vs_genl_parse_service(&usvc,
3105 info->attrs[IPVS_CMD_ATTR_SERVICE],
3106 need_full_svc);
3107 if (ret)
3108 goto out;
3109
3110 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3111 if (usvc.fwmark == 0)
b18610de
JV
3112 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3113 &usvc.addr, usvc.port);
9a812198 3114 else
b18610de 3115 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3116
3117 /* Unless we're adding a new service, the service must already exist */
3118 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3119 ret = -ESRCH;
3120 goto out;
3121 }
3122
3123 /* Destination commands require a valid destination argument. For
3124 * adding / editing a destination, we need a full destination
3125 * specification. */
3126 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3127 cmd == IPVS_CMD_DEL_DEST) {
3128 if (cmd != IPVS_CMD_DEL_DEST)
3129 need_full_dest = 1;
3130
3131 ret = ip_vs_genl_parse_dest(&udest,
3132 info->attrs[IPVS_CMD_ATTR_DEST],
3133 need_full_dest);
3134 if (ret)
3135 goto out;
3136 }
3137
3138 switch (cmd) {
3139 case IPVS_CMD_NEW_SERVICE:
3140 if (svc == NULL)
3141 ret = ip_vs_add_service(&usvc, &svc);
3142 else
3143 ret = -EEXIST;
3144 break;
3145 case IPVS_CMD_SET_SERVICE:
3146 ret = ip_vs_edit_service(svc, &usvc);
3147 break;
3148 case IPVS_CMD_DEL_SERVICE:
3149 ret = ip_vs_del_service(svc);
3150 break;
3151 case IPVS_CMD_NEW_DEST:
3152 ret = ip_vs_add_dest(svc, &udest);
3153 break;
3154 case IPVS_CMD_SET_DEST:
3155 ret = ip_vs_edit_dest(svc, &udest);
3156 break;
3157 case IPVS_CMD_DEL_DEST:
3158 ret = ip_vs_del_dest(svc, &udest);
3159 break;
3160 case IPVS_CMD_ZERO:
3161 ret = ip_vs_zero_service(svc);
3162 break;
3163 default:
3164 ret = -EINVAL;
3165 }
3166
3167out:
3168 if (svc)
3169 ip_vs_service_put(svc);
3170 mutex_unlock(&__ip_vs_mutex);
3171
3172 return ret;
3173}
3174
3175static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3176{
3177 struct sk_buff *msg;
3178 void *reply;
3179 int ret, cmd, reply_cmd;
3180
3181 cmd = info->genlhdr->cmd;
3182
3183 if (cmd == IPVS_CMD_GET_SERVICE)
3184 reply_cmd = IPVS_CMD_NEW_SERVICE;
3185 else if (cmd == IPVS_CMD_GET_INFO)
3186 reply_cmd = IPVS_CMD_SET_INFO;
3187 else if (cmd == IPVS_CMD_GET_CONFIG)
3188 reply_cmd = IPVS_CMD_SET_CONFIG;
3189 else {
3190 IP_VS_ERR("unknown Generic Netlink command\n");
3191 return -EINVAL;
3192 }
3193
3194 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3195 if (!msg)
3196 return -ENOMEM;
3197
3198 mutex_lock(&__ip_vs_mutex);
3199
3200 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3201 if (reply == NULL)
3202 goto nla_put_failure;
3203
3204 switch (cmd) {
3205 case IPVS_CMD_GET_SERVICE:
3206 {
3207 struct ip_vs_service *svc;
3208
3209 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3210 if (IS_ERR(svc)) {
3211 ret = PTR_ERR(svc);
3212 goto out_err;
3213 } else if (svc) {
3214 ret = ip_vs_genl_fill_service(msg, svc);
3215 ip_vs_service_put(svc);
3216 if (ret)
3217 goto nla_put_failure;
3218 } else {
3219 ret = -ESRCH;
3220 goto out_err;
3221 }
3222
3223 break;
3224 }
3225
3226 case IPVS_CMD_GET_CONFIG:
3227 {
3228 struct ip_vs_timeout_user t;
3229
3230 __ip_vs_get_timeouts(&t);
3231#ifdef CONFIG_IP_VS_PROTO_TCP
3232 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3233 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3234 t.tcp_fin_timeout);
3235#endif
3236#ifdef CONFIG_IP_VS_PROTO_UDP
3237 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3238#endif
3239
3240 break;
3241 }
3242
3243 case IPVS_CMD_GET_INFO:
3244 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3245 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3246 IP_VS_CONN_TAB_SIZE);
3247 break;
3248 }
3249
3250 genlmsg_end(msg, reply);
3251 ret = genlmsg_unicast(msg, info->snd_pid);
3252 goto out;
3253
3254nla_put_failure:
3255 IP_VS_ERR("not enough space in Netlink message\n");
3256 ret = -EMSGSIZE;
3257
3258out_err:
3259 nlmsg_free(msg);
3260out:
3261 mutex_unlock(&__ip_vs_mutex);
3262
3263 return ret;
3264}
3265
3266
3267static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3268 {
3269 .cmd = IPVS_CMD_NEW_SERVICE,
3270 .flags = GENL_ADMIN_PERM,
3271 .policy = ip_vs_cmd_policy,
3272 .doit = ip_vs_genl_set_cmd,
3273 },
3274 {
3275 .cmd = IPVS_CMD_SET_SERVICE,
3276 .flags = GENL_ADMIN_PERM,
3277 .policy = ip_vs_cmd_policy,
3278 .doit = ip_vs_genl_set_cmd,
3279 },
3280 {
3281 .cmd = IPVS_CMD_DEL_SERVICE,
3282 .flags = GENL_ADMIN_PERM,
3283 .policy = ip_vs_cmd_policy,
3284 .doit = ip_vs_genl_set_cmd,
3285 },
3286 {
3287 .cmd = IPVS_CMD_GET_SERVICE,
3288 .flags = GENL_ADMIN_PERM,
3289 .doit = ip_vs_genl_get_cmd,
3290 .dumpit = ip_vs_genl_dump_services,
3291 .policy = ip_vs_cmd_policy,
3292 },
3293 {
3294 .cmd = IPVS_CMD_NEW_DEST,
3295 .flags = GENL_ADMIN_PERM,
3296 .policy = ip_vs_cmd_policy,
3297 .doit = ip_vs_genl_set_cmd,
3298 },
3299 {
3300 .cmd = IPVS_CMD_SET_DEST,
3301 .flags = GENL_ADMIN_PERM,
3302 .policy = ip_vs_cmd_policy,
3303 .doit = ip_vs_genl_set_cmd,
3304 },
3305 {
3306 .cmd = IPVS_CMD_DEL_DEST,
3307 .flags = GENL_ADMIN_PERM,
3308 .policy = ip_vs_cmd_policy,
3309 .doit = ip_vs_genl_set_cmd,
3310 },
3311 {
3312 .cmd = IPVS_CMD_GET_DEST,
3313 .flags = GENL_ADMIN_PERM,
3314 .policy = ip_vs_cmd_policy,
3315 .dumpit = ip_vs_genl_dump_dests,
3316 },
3317 {
3318 .cmd = IPVS_CMD_NEW_DAEMON,
3319 .flags = GENL_ADMIN_PERM,
3320 .policy = ip_vs_cmd_policy,
3321 .doit = ip_vs_genl_set_cmd,
3322 },
3323 {
3324 .cmd = IPVS_CMD_DEL_DAEMON,
3325 .flags = GENL_ADMIN_PERM,
3326 .policy = ip_vs_cmd_policy,
3327 .doit = ip_vs_genl_set_cmd,
3328 },
3329 {
3330 .cmd = IPVS_CMD_GET_DAEMON,
3331 .flags = GENL_ADMIN_PERM,
3332 .dumpit = ip_vs_genl_dump_daemons,
3333 },
3334 {
3335 .cmd = IPVS_CMD_SET_CONFIG,
3336 .flags = GENL_ADMIN_PERM,
3337 .policy = ip_vs_cmd_policy,
3338 .doit = ip_vs_genl_set_cmd,
3339 },
3340 {
3341 .cmd = IPVS_CMD_GET_CONFIG,
3342 .flags = GENL_ADMIN_PERM,
3343 .doit = ip_vs_genl_get_cmd,
3344 },
3345 {
3346 .cmd = IPVS_CMD_GET_INFO,
3347 .flags = GENL_ADMIN_PERM,
3348 .doit = ip_vs_genl_get_cmd,
3349 },
3350 {
3351 .cmd = IPVS_CMD_ZERO,
3352 .flags = GENL_ADMIN_PERM,
3353 .policy = ip_vs_cmd_policy,
3354 .doit = ip_vs_genl_set_cmd,
3355 },
3356 {
3357 .cmd = IPVS_CMD_FLUSH,
3358 .flags = GENL_ADMIN_PERM,
3359 .doit = ip_vs_genl_set_cmd,
3360 },
3361};
3362
3363static int __init ip_vs_genl_register(void)
3364{
3365 int ret, i;
3366
3367 ret = genl_register_family(&ip_vs_genl_family);
3368 if (ret)
3369 return ret;
3370
3371 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3372 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3373 if (ret)
3374 goto err_out;
3375 }
3376 return 0;
3377
3378err_out:
3379 genl_unregister_family(&ip_vs_genl_family);
3380 return ret;
3381}
3382
3383static void ip_vs_genl_unregister(void)
3384{
3385 genl_unregister_family(&ip_vs_genl_family);
3386}
3387
3388/* End of Generic Netlink interface definitions */
3389
1da177e4 3390
048cf48b 3391int __init ip_vs_control_init(void)
1da177e4
LT
3392{
3393 int ret;
3394 int idx;
3395
3396 EnterFunction(2);
3397
3398 ret = nf_register_sockopt(&ip_vs_sockopts);
3399 if (ret) {
3400 IP_VS_ERR("cannot register sockopt.\n");
3401 return ret;
3402 }
3403
9a812198
JV
3404 ret = ip_vs_genl_register();
3405 if (ret) {
3406 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3407 nf_unregister_sockopt(&ip_vs_sockopts);
3408 return ret;
3409 }
3410
457c4cbc
EB
3411 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3412 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3413
90754f8e 3414 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3415
3416 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3417 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3418 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3419 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3420 }
3421 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3422 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3423 }
3424
1da177e4
LT
3425 ip_vs_new_estimator(&ip_vs_stats);
3426
3427 /* Hook the defense timer */
3428 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3429
3430 LeaveFunction(2);
3431 return 0;
3432}
3433
3434
3435void ip_vs_control_cleanup(void)
3436{
3437 EnterFunction(2);
3438 ip_vs_trash_cleanup();
3439 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3440 cancel_work_sync(&defense_work.work);
1da177e4
LT
3441 ip_vs_kill_estimator(&ip_vs_stats);
3442 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3443 proc_net_remove(&init_net, "ip_vs_stats");
3444 proc_net_remove(&init_net, "ip_vs");
9a812198 3445 ip_vs_genl_unregister();
1da177e4
LT
3446 nf_unregister_sockopt(&ip_vs_sockopts);
3447 LeaveFunction(2);
3448}