]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
ipvs: supply a valid 0 address to ip_vs_conn_new()
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0 746
e9c0ce23 747 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 748 ip_vs_zero_estimator(stats);
e93615d0 749
3a14a313 750 spin_unlock_bh(&stats->lock);
1da177e4
LT
751}
752
753/*
754 * Update a destination in the given service
755 */
756static void
757__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 758 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
759{
760 int conn_flags;
761
762 /* set the weight and the flags */
763 atomic_set(&dest->weight, udest->weight);
764 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
765
766 /* check if local node and update the flags */
09571c7a
VB
767#ifdef CONFIG_IP_VS_IPV6
768 if (svc->af == AF_INET6) {
769 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
770 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
771 | IP_VS_CONN_F_LOCALNODE;
772 }
773 } else
774#endif
775 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
776 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
777 | IP_VS_CONN_F_LOCALNODE;
778 }
1da177e4
LT
779
780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783 } else {
784 /*
785 * Put the real service in ip_vs_rtable if not present.
786 * For now only for NAT!
787 */
788 write_lock_bh(&__ip_vs_rs_lock);
789 ip_vs_rs_hash(dest);
790 write_unlock_bh(&__ip_vs_rs_lock);
791 }
792 atomic_set(&dest->conn_flags, conn_flags);
793
794 /* bind the service */
795 if (!dest->svc) {
796 __ip_vs_bind_svc(dest, svc);
797 } else {
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
802 }
803 }
804
805 /* set the dest status flags */
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
812}
813
814
815/*
816 * Create a destination for the given service
817 */
818static int
c860c6b1 819ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
820 struct ip_vs_dest **dest_p)
821{
822 struct ip_vs_dest *dest;
823 unsigned atype;
824
825 EnterFunction(2);
826
09571c7a
VB
827#ifdef CONFIG_IP_VS_IPV6
828 if (svc->af == AF_INET6) {
829 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
830 if ((!(atype & IPV6_ADDR_UNICAST) ||
831 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
832 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
833 return -EINVAL;
834 } else
835#endif
836 {
837 atype = inet_addr_type(&init_net, udest->addr.ip);
838 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
839 return -EINVAL;
840 }
1da177e4 841
0da974f4 842 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
843 if (dest == NULL) {
844 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
845 return -ENOMEM;
846 }
1da177e4 847
c860c6b1 848 dest->af = svc->af;
1da177e4 849 dest->protocol = svc->protocol;
c860c6b1 850 dest->vaddr = svc->addr;
1da177e4
LT
851 dest->vport = svc->port;
852 dest->vfwmark = svc->fwmark;
c860c6b1 853 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
854 dest->port = udest->port;
855
856 atomic_set(&dest->activeconns, 0);
857 atomic_set(&dest->inactconns, 0);
858 atomic_set(&dest->persistconns, 0);
859 atomic_set(&dest->refcnt, 0);
860
861 INIT_LIST_HEAD(&dest->d_list);
862 spin_lock_init(&dest->dst_lock);
863 spin_lock_init(&dest->stats.lock);
864 __ip_vs_update_dest(svc, dest, udest);
865 ip_vs_new_estimator(&dest->stats);
866
867 *dest_p = dest;
868
869 LeaveFunction(2);
870 return 0;
871}
872
873
874/*
875 * Add a destination into an existing service
876 */
877static int
c860c6b1 878ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
879{
880 struct ip_vs_dest *dest;
c860c6b1 881 union nf_inet_addr daddr;
014d730d 882 __be16 dport = udest->port;
1da177e4
LT
883 int ret;
884
885 EnterFunction(2);
886
887 if (udest->weight < 0) {
888 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
889 return -ERANGE;
890 }
891
892 if (udest->l_threshold > udest->u_threshold) {
893 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
894 "upper threshold\n");
895 return -ERANGE;
896 }
897
c860c6b1
JV
898 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
899
1da177e4
LT
900 /*
901 * Check if the dest already exists in the list
902 */
7937df15
JV
903 dest = ip_vs_lookup_dest(svc, &daddr, dport);
904
1da177e4
LT
905 if (dest != NULL) {
906 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
907 return -EEXIST;
908 }
909
910 /*
911 * Check if the dest already exists in the trash and
912 * is from the same service
913 */
7937df15
JV
914 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
915
1da177e4 916 if (dest != NULL) {
cfc78c5a
JV
917 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
918 "dest->refcnt=%d, service %u/%s:%u\n",
919 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
920 atomic_read(&dest->refcnt),
921 dest->vfwmark,
922 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
923 ntohs(dest->vport));
924
1da177e4
LT
925 __ip_vs_update_dest(svc, dest, udest);
926
927 /*
928 * Get the destination from the trash
929 */
930 list_del(&dest->n_list);
931
932 ip_vs_new_estimator(&dest->stats);
933
934 write_lock_bh(&__ip_vs_svc_lock);
935
936 /*
937 * Wait until all other svc users go away.
938 */
939 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
940
941 list_add(&dest->n_list, &svc->destinations);
942 svc->num_dests++;
943
944 /* call the update_service function of its scheduler */
82dfb6f3
SW
945 if (svc->scheduler->update_service)
946 svc->scheduler->update_service(svc);
1da177e4
LT
947
948 write_unlock_bh(&__ip_vs_svc_lock);
949 return 0;
950 }
951
952 /*
953 * Allocate and initialize the dest structure
954 */
955 ret = ip_vs_new_dest(svc, udest, &dest);
956 if (ret) {
957 return ret;
958 }
959
960 /*
961 * Add the dest entry into the list
962 */
963 atomic_inc(&dest->refcnt);
964
965 write_lock_bh(&__ip_vs_svc_lock);
966
967 /*
968 * Wait until all other svc users go away.
969 */
970 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
971
972 list_add(&dest->n_list, &svc->destinations);
973 svc->num_dests++;
974
975 /* call the update_service function of its scheduler */
82dfb6f3
SW
976 if (svc->scheduler->update_service)
977 svc->scheduler->update_service(svc);
1da177e4
LT
978
979 write_unlock_bh(&__ip_vs_svc_lock);
980
981 LeaveFunction(2);
982
983 return 0;
984}
985
986
987/*
988 * Edit a destination in the given service
989 */
990static int
c860c6b1 991ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
992{
993 struct ip_vs_dest *dest;
c860c6b1 994 union nf_inet_addr daddr;
014d730d 995 __be16 dport = udest->port;
1da177e4
LT
996
997 EnterFunction(2);
998
999 if (udest->weight < 0) {
1000 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1001 return -ERANGE;
1002 }
1003
1004 if (udest->l_threshold > udest->u_threshold) {
1005 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1006 "upper threshold\n");
1007 return -ERANGE;
1008 }
1009
c860c6b1
JV
1010 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1011
1da177e4
LT
1012 /*
1013 * Lookup the destination list
1014 */
7937df15
JV
1015 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1016
1da177e4
LT
1017 if (dest == NULL) {
1018 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1019 return -ENOENT;
1020 }
1021
1022 __ip_vs_update_dest(svc, dest, udest);
1023
1024 write_lock_bh(&__ip_vs_svc_lock);
1025
1026 /* Wait until all other svc users go away */
cae7ca3d 1027 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1028
1029 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1030 if (svc->scheduler->update_service)
1031 svc->scheduler->update_service(svc);
1da177e4
LT
1032
1033 write_unlock_bh(&__ip_vs_svc_lock);
1034
1035 LeaveFunction(2);
1036
1037 return 0;
1038}
1039
1040
1041/*
1042 * Delete a destination (must be already unlinked from the service)
1043 */
1044static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1045{
1046 ip_vs_kill_estimator(&dest->stats);
1047
1048 /*
1049 * Remove it from the d-linked list with the real services.
1050 */
1051 write_lock_bh(&__ip_vs_rs_lock);
1052 ip_vs_rs_unhash(dest);
1053 write_unlock_bh(&__ip_vs_rs_lock);
1054
1055 /*
1056 * Decrease the refcnt of the dest, and free the dest
1057 * if nobody refers to it (refcnt=0). Otherwise, throw
1058 * the destination into the trash.
1059 */
1060 if (atomic_dec_and_test(&dest->refcnt)) {
1061 ip_vs_dst_reset(dest);
1062 /* simply decrease svc->refcnt here, let the caller check
1063 and release the service if nobody refers to it.
1064 Only user context can release destination and service,
1065 and only one user context can update virtual service at a
1066 time, so the operation here is OK */
1067 atomic_dec(&dest->svc->refcnt);
1068 kfree(dest);
1069 } else {
cfc78c5a
JV
1070 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1071 "dest->refcnt=%d\n",
1072 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1073 ntohs(dest->port),
1074 atomic_read(&dest->refcnt));
1da177e4
LT
1075 list_add(&dest->n_list, &ip_vs_dest_trash);
1076 atomic_inc(&dest->refcnt);
1077 }
1078}
1079
1080
1081/*
1082 * Unlink a destination from the given service
1083 */
1084static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1085 struct ip_vs_dest *dest,
1086 int svcupd)
1087{
1088 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1089
1090 /*
1091 * Remove it from the d-linked destination list.
1092 */
1093 list_del(&dest->n_list);
1094 svc->num_dests--;
82dfb6f3
SW
1095
1096 /*
1097 * Call the update_service function of its scheduler
1098 */
1099 if (svcupd && svc->scheduler->update_service)
1100 svc->scheduler->update_service(svc);
1da177e4
LT
1101}
1102
1103
1104/*
1105 * Delete a destination server in the given service
1106 */
1107static int
c860c6b1 1108ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1109{
1110 struct ip_vs_dest *dest;
014d730d 1111 __be16 dport = udest->port;
1da177e4
LT
1112
1113 EnterFunction(2);
1114
7937df15 1115 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1116
1da177e4
LT
1117 if (dest == NULL) {
1118 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1119 return -ENOENT;
1120 }
1121
1122 write_lock_bh(&__ip_vs_svc_lock);
1123
1124 /*
1125 * Wait until all other svc users go away.
1126 */
1127 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1128
1129 /*
1130 * Unlink dest from the service
1131 */
1132 __ip_vs_unlink_dest(svc, dest, 1);
1133
1134 write_unlock_bh(&__ip_vs_svc_lock);
1135
1136 /*
1137 * Delete the destination
1138 */
1139 __ip_vs_del_dest(dest);
1140
1141 LeaveFunction(2);
1142
1143 return 0;
1144}
1145
1146
1147/*
1148 * Add a service into the service hash table
1149 */
1150static int
c860c6b1
JV
1151ip_vs_add_service(struct ip_vs_service_user_kern *u,
1152 struct ip_vs_service **svc_p)
1da177e4
LT
1153{
1154 int ret = 0;
1155 struct ip_vs_scheduler *sched = NULL;
1156 struct ip_vs_service *svc = NULL;
1157
1158 /* increase the module use count */
1159 ip_vs_use_count_inc();
1160
1161 /* Lookup the scheduler by 'u->sched_name' */
1162 sched = ip_vs_scheduler_get(u->sched_name);
1163 if (sched == NULL) {
1164 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1165 u->sched_name);
1166 ret = -ENOENT;
1167 goto out_mod_dec;
1168 }
1169
f94fd041
JV
1170#ifdef CONFIG_IP_VS_IPV6
1171 if (u->af == AF_INET6) {
1172 if (!sched->supports_ipv6) {
1173 ret = -EAFNOSUPPORT;
1174 goto out_err;
1175 }
1176 if ((u->netmask < 1) || (u->netmask > 128)) {
1177 ret = -EINVAL;
1178 goto out_err;
1179 }
1180 }
1181#endif
1182
0da974f4 1183 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1184 if (svc == NULL) {
1185 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1186 ret = -ENOMEM;
1187 goto out_err;
1188 }
1da177e4
LT
1189
1190 /* I'm the first user of the service */
1191 atomic_set(&svc->usecnt, 1);
1192 atomic_set(&svc->refcnt, 0);
1193
c860c6b1 1194 svc->af = u->af;
1da177e4 1195 svc->protocol = u->protocol;
c860c6b1 1196 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1197 svc->port = u->port;
1198 svc->fwmark = u->fwmark;
1199 svc->flags = u->flags;
1200 svc->timeout = u->timeout * HZ;
1201 svc->netmask = u->netmask;
1202
1203 INIT_LIST_HEAD(&svc->destinations);
1204 rwlock_init(&svc->sched_lock);
1205 spin_lock_init(&svc->stats.lock);
1206
1207 /* Bind the scheduler */
1208 ret = ip_vs_bind_scheduler(svc, sched);
1209 if (ret)
1210 goto out_err;
1211 sched = NULL;
1212
1213 /* Update the virtual service counters */
1214 if (svc->port == FTPPORT)
1215 atomic_inc(&ip_vs_ftpsvc_counter);
1216 else if (svc->port == 0)
1217 atomic_inc(&ip_vs_nullsvc_counter);
1218
1219 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1220
1221 /* Count only IPv4 services for old get/setsockopt interface */
1222 if (svc->af == AF_INET)
1223 ip_vs_num_services++;
1da177e4
LT
1224
1225 /* Hash the service into the service table */
1226 write_lock_bh(&__ip_vs_svc_lock);
1227 ip_vs_svc_hash(svc);
1228 write_unlock_bh(&__ip_vs_svc_lock);
1229
1230 *svc_p = svc;
1231 return 0;
1232
1233 out_err:
1234 if (svc != NULL) {
1235 if (svc->scheduler)
1236 ip_vs_unbind_scheduler(svc);
1237 if (svc->inc) {
1238 local_bh_disable();
1239 ip_vs_app_inc_put(svc->inc);
1240 local_bh_enable();
1241 }
1242 kfree(svc);
1243 }
1244 ip_vs_scheduler_put(sched);
1245
1246 out_mod_dec:
1247 /* decrease the module use count */
1248 ip_vs_use_count_dec();
1249
1250 return ret;
1251}
1252
1253
1254/*
1255 * Edit a service and bind it with a new scheduler
1256 */
1257static int
c860c6b1 1258ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1259{
1260 struct ip_vs_scheduler *sched, *old_sched;
1261 int ret = 0;
1262
1263 /*
1264 * Lookup the scheduler, by 'u->sched_name'
1265 */
1266 sched = ip_vs_scheduler_get(u->sched_name);
1267 if (sched == NULL) {
1268 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1269 u->sched_name);
1270 return -ENOENT;
1271 }
1272 old_sched = sched;
1273
f94fd041
JV
1274#ifdef CONFIG_IP_VS_IPV6
1275 if (u->af == AF_INET6) {
1276 if (!sched->supports_ipv6) {
a5ba4bf2 1277 ret = -EAFNOSUPPORT;
f94fd041
JV
1278 goto out;
1279 }
1280 if ((u->netmask < 1) || (u->netmask > 128)) {
a5ba4bf2 1281 ret = -EINVAL;
f94fd041
JV
1282 goto out;
1283 }
1284 }
1285#endif
1286
1da177e4
LT
1287 write_lock_bh(&__ip_vs_svc_lock);
1288
1289 /*
1290 * Wait until all other svc users go away.
1291 */
1292 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1293
1294 /*
1295 * Set the flags and timeout value
1296 */
1297 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298 svc->timeout = u->timeout * HZ;
1299 svc->netmask = u->netmask;
1300
1301 old_sched = svc->scheduler;
1302 if (sched != old_sched) {
1303 /*
1304 * Unbind the old scheduler
1305 */
1306 if ((ret = ip_vs_unbind_scheduler(svc))) {
1307 old_sched = sched;
9e691ed6 1308 goto out_unlock;
1da177e4
LT
1309 }
1310
1311 /*
1312 * Bind the new scheduler
1313 */
1314 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315 /*
1316 * If ip_vs_bind_scheduler fails, restore the old
1317 * scheduler.
1318 * The main reason of failure is out of memory.
1319 *
1320 * The question is if the old scheduler can be
1321 * restored all the time. TODO: if it cannot be
1322 * restored some time, we must delete the service,
1323 * otherwise the system may crash.
1324 */
1325 ip_vs_bind_scheduler(svc, old_sched);
1326 old_sched = sched;
9e691ed6 1327 goto out_unlock;
1da177e4
LT
1328 }
1329 }
1330
9e691ed6 1331 out_unlock:
1da177e4 1332 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1333 out:
1da177e4
LT
1334
1335 if (old_sched)
1336 ip_vs_scheduler_put(old_sched);
1337
1338 return ret;
1339}
1340
1341
1342/*
1343 * Delete a service from the service list
1344 * - The service must be unlinked, unlocked and not referenced!
1345 * - We are called under _bh lock
1346 */
1347static void __ip_vs_del_service(struct ip_vs_service *svc)
1348{
1349 struct ip_vs_dest *dest, *nxt;
1350 struct ip_vs_scheduler *old_sched;
1351
f94fd041
JV
1352 /* Count only IPv4 services for old get/setsockopt interface */
1353 if (svc->af == AF_INET)
1354 ip_vs_num_services--;
1355
1da177e4
LT
1356 ip_vs_kill_estimator(&svc->stats);
1357
1358 /* Unbind scheduler */
1359 old_sched = svc->scheduler;
1360 ip_vs_unbind_scheduler(svc);
1361 if (old_sched)
1362 ip_vs_scheduler_put(old_sched);
1363
1364 /* Unbind app inc */
1365 if (svc->inc) {
1366 ip_vs_app_inc_put(svc->inc);
1367 svc->inc = NULL;
1368 }
1369
1370 /*
1371 * Unlink the whole destination list
1372 */
1373 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1374 __ip_vs_unlink_dest(svc, dest, 0);
1375 __ip_vs_del_dest(dest);
1376 }
1377
1378 /*
1379 * Update the virtual service counters
1380 */
1381 if (svc->port == FTPPORT)
1382 atomic_dec(&ip_vs_ftpsvc_counter);
1383 else if (svc->port == 0)
1384 atomic_dec(&ip_vs_nullsvc_counter);
1385
1386 /*
1387 * Free the service if nobody refers to it
1388 */
1389 if (atomic_read(&svc->refcnt) == 0)
1390 kfree(svc);
1391
1392 /* decrease the module use count */
1393 ip_vs_use_count_dec();
1394}
1395
1396/*
1397 * Delete a service from the service list
1398 */
1399static int ip_vs_del_service(struct ip_vs_service *svc)
1400{
1401 if (svc == NULL)
1402 return -EEXIST;
1403
1404 /*
1405 * Unhash it from the service table
1406 */
1407 write_lock_bh(&__ip_vs_svc_lock);
1408
1409 ip_vs_svc_unhash(svc);
1410
1411 /*
1412 * Wait until all the svc users go away.
1413 */
1414 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1415
1416 __ip_vs_del_service(svc);
1417
1418 write_unlock_bh(&__ip_vs_svc_lock);
1419
1420 return 0;
1421}
1422
1423
1424/*
1425 * Flush all the virtual services
1426 */
1427static int ip_vs_flush(void)
1428{
1429 int idx;
1430 struct ip_vs_service *svc, *nxt;
1431
1432 /*
1433 * Flush the service table hashed by <protocol,addr,port>
1434 */
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1437 write_lock_bh(&__ip_vs_svc_lock);
1438 ip_vs_svc_unhash(svc);
1439 /*
1440 * Wait until all the svc users go away.
1441 */
1442 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1443 __ip_vs_del_service(svc);
1444 write_unlock_bh(&__ip_vs_svc_lock);
1445 }
1446 }
1447
1448 /*
1449 * Flush the service table hashed by fwmark
1450 */
1451 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1452 list_for_each_entry_safe(svc, nxt,
1453 &ip_vs_svc_fwm_table[idx], f_list) {
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 ip_vs_svc_unhash(svc);
1456 /*
1457 * Wait until all the svc users go away.
1458 */
1459 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1460 __ip_vs_del_service(svc);
1461 write_unlock_bh(&__ip_vs_svc_lock);
1462 }
1463 }
1464
1465 return 0;
1466}
1467
1468
1469/*
1470 * Zero counters in a service or all services
1471 */
1472static int ip_vs_zero_service(struct ip_vs_service *svc)
1473{
1474 struct ip_vs_dest *dest;
1475
1476 write_lock_bh(&__ip_vs_svc_lock);
1477 list_for_each_entry(dest, &svc->destinations, n_list) {
1478 ip_vs_zero_stats(&dest->stats);
1479 }
1480 ip_vs_zero_stats(&svc->stats);
1481 write_unlock_bh(&__ip_vs_svc_lock);
1482 return 0;
1483}
1484
1485static int ip_vs_zero_all(void)
1486{
1487 int idx;
1488 struct ip_vs_service *svc;
1489
1490 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1491 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1492 ip_vs_zero_service(svc);
1493 }
1494 }
1495
1496 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1497 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1498 ip_vs_zero_service(svc);
1499 }
1500 }
1501
1502 ip_vs_zero_stats(&ip_vs_stats);
1503 return 0;
1504}
1505
1506
1507static int
1508proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1509 void __user *buffer, size_t *lenp, loff_t *ppos)
1510{
1511 int *valp = table->data;
1512 int val = *valp;
1513 int rc;
1514
1515 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1516 if (write && (*valp != val)) {
1517 if ((*valp < 0) || (*valp > 3)) {
1518 /* Restore the correct value */
1519 *valp = val;
1520 } else {
1da177e4 1521 update_defense_level();
1da177e4
LT
1522 }
1523 }
1524 return rc;
1525}
1526
1527
1528static int
1529proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val[2];
1534 int rc;
1535
1536 /* backup the value first */
1537 memcpy(val, valp, sizeof(val));
1538
1539 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1540 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1541 /* Restore the correct value */
1542 memcpy(valp, val, sizeof(val));
1543 }
1544 return rc;
1545}
1546
1547
1548/*
1549 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1550 */
1551
1552static struct ctl_table vs_vars[] = {
1553 {
1da177e4
LT
1554 .procname = "amemthresh",
1555 .data = &sysctl_ip_vs_amemthresh,
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = &proc_dointvec,
1559 },
1560#ifdef CONFIG_IP_VS_DEBUG
1561 {
1da177e4
LT
1562 .procname = "debug_level",
1563 .data = &sysctl_ip_vs_debug_level,
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
1566 .proc_handler = &proc_dointvec,
1567 },
1568#endif
1569 {
1da177e4
LT
1570 .procname = "am_droprate",
1571 .data = &sysctl_ip_vs_am_droprate,
1572 .maxlen = sizeof(int),
1573 .mode = 0644,
1574 .proc_handler = &proc_dointvec,
1575 },
1576 {
1da177e4
LT
1577 .procname = "drop_entry",
1578 .data = &sysctl_ip_vs_drop_entry,
1579 .maxlen = sizeof(int),
1580 .mode = 0644,
1581 .proc_handler = &proc_do_defense_mode,
1582 },
1583 {
1da177e4
LT
1584 .procname = "drop_packet",
1585 .data = &sysctl_ip_vs_drop_packet,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_do_defense_mode,
1589 },
1590 {
1da177e4
LT
1591 .procname = "secure_tcp",
1592 .data = &sysctl_ip_vs_secure_tcp,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
1595 .proc_handler = &proc_do_defense_mode,
1596 },
1597#if 0
1598 {
1da177e4
LT
1599 .procname = "timeout_established",
1600 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec_jiffies,
1604 },
1605 {
1da177e4
LT
1606 .procname = "timeout_synsent",
1607 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = &proc_dointvec_jiffies,
1611 },
1612 {
1da177e4
LT
1613 .procname = "timeout_synrecv",
1614 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = &proc_dointvec_jiffies,
1618 },
1619 {
1da177e4
LT
1620 .procname = "timeout_finwait",
1621 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1622 .maxlen = sizeof(int),
1623 .mode = 0644,
1624 .proc_handler = &proc_dointvec_jiffies,
1625 },
1626 {
1da177e4
LT
1627 .procname = "timeout_timewait",
1628 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = &proc_dointvec_jiffies,
1632 },
1633 {
1da177e4
LT
1634 .procname = "timeout_close",
1635 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = &proc_dointvec_jiffies,
1639 },
1640 {
1da177e4
LT
1641 .procname = "timeout_closewait",
1642 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1643 .maxlen = sizeof(int),
1644 .mode = 0644,
1645 .proc_handler = &proc_dointvec_jiffies,
1646 },
1647 {
1da177e4
LT
1648 .procname = "timeout_lastack",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
1652 .proc_handler = &proc_dointvec_jiffies,
1653 },
1654 {
1da177e4
LT
1655 .procname = "timeout_listen",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
1659 .proc_handler = &proc_dointvec_jiffies,
1660 },
1661 {
1da177e4
LT
1662 .procname = "timeout_synack",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
1666 .proc_handler = &proc_dointvec_jiffies,
1667 },
1668 {
1da177e4
LT
1669 .procname = "timeout_udp",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
1673 .proc_handler = &proc_dointvec_jiffies,
1674 },
1675 {
1da177e4
LT
1676 .procname = "timeout_icmp",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
1680 .proc_handler = &proc_dointvec_jiffies,
1681 },
1682#endif
1683 {
1da177e4
LT
1684 .procname = "cache_bypass",
1685 .data = &sysctl_ip_vs_cache_bypass,
1686 .maxlen = sizeof(int),
1687 .mode = 0644,
1688 .proc_handler = &proc_dointvec,
1689 },
1690 {
1da177e4
LT
1691 .procname = "expire_nodest_conn",
1692 .data = &sysctl_ip_vs_expire_nodest_conn,
1693 .maxlen = sizeof(int),
1694 .mode = 0644,
1695 .proc_handler = &proc_dointvec,
1696 },
1697 {
1da177e4
LT
1698 .procname = "expire_quiescent_template",
1699 .data = &sysctl_ip_vs_expire_quiescent_template,
1700 .maxlen = sizeof(int),
1701 .mode = 0644,
1702 .proc_handler = &proc_dointvec,
1703 },
1704 {
1da177e4
LT
1705 .procname = "sync_threshold",
1706 .data = &sysctl_ip_vs_sync_threshold,
1707 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1708 .mode = 0644,
1709 .proc_handler = &proc_do_sync_threshold,
1710 },
1711 {
1da177e4
LT
1712 .procname = "nat_icmp_send",
1713 .data = &sysctl_ip_vs_nat_icmp_send,
1714 .maxlen = sizeof(int),
1715 .mode = 0644,
1716 .proc_handler = &proc_dointvec,
1717 },
1718 { .ctl_name = 0 }
1719};
1720
5587da55 1721const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1722 { .procname = "net", .ctl_name = CTL_NET, },
1723 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1724 { .procname = "vs", },
1725 { }
1da177e4 1726};
90754f8e 1727EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1728
1729static struct ctl_table_header * sysctl_header;
1730
1731#ifdef CONFIG_PROC_FS
1732
1733struct ip_vs_iter {
1734 struct list_head *table;
1735 int bucket;
1736};
1737
1738/*
1739 * Write the contents of the VS rule table to a PROCfs file.
1740 * (It is kept just for backward compatibility)
1741 */
1742static inline const char *ip_vs_fwd_name(unsigned flags)
1743{
1744 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1745 case IP_VS_CONN_F_LOCALNODE:
1746 return "Local";
1747 case IP_VS_CONN_F_TUNNEL:
1748 return "Tunnel";
1749 case IP_VS_CONN_F_DROUTE:
1750 return "Route";
1751 default:
1752 return "Masq";
1753 }
1754}
1755
1756
1757/* Get the Nth entry in the two lists */
1758static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1759{
1760 struct ip_vs_iter *iter = seq->private;
1761 int idx;
1762 struct ip_vs_service *svc;
1763
1764 /* look in hash by protocol */
1765 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1766 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1767 if (pos-- == 0){
1768 iter->table = ip_vs_svc_table;
1769 iter->bucket = idx;
1770 return svc;
1771 }
1772 }
1773 }
1774
1775 /* keep looking in fwmark */
1776 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1777 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1778 if (pos-- == 0) {
1779 iter->table = ip_vs_svc_fwm_table;
1780 iter->bucket = idx;
1781 return svc;
1782 }
1783 }
1784 }
1785
1786 return NULL;
1787}
1788
1789static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1790{
1791
1792 read_lock_bh(&__ip_vs_svc_lock);
1793 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1794}
1795
1796
1797static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1798{
1799 struct list_head *e;
1800 struct ip_vs_iter *iter;
1801 struct ip_vs_service *svc;
1802
1803 ++*pos;
1804 if (v == SEQ_START_TOKEN)
1805 return ip_vs_info_array(seq,0);
1806
1807 svc = v;
1808 iter = seq->private;
1809
1810 if (iter->table == ip_vs_svc_table) {
1811 /* next service in table hashed by protocol */
1812 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1813 return list_entry(e, struct ip_vs_service, s_list);
1814
1815
1816 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1817 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1818 s_list) {
1819 return svc;
1820 }
1821 }
1822
1823 iter->table = ip_vs_svc_fwm_table;
1824 iter->bucket = -1;
1825 goto scan_fwmark;
1826 }
1827
1828 /* next service in hashed by fwmark */
1829 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1830 return list_entry(e, struct ip_vs_service, f_list);
1831
1832 scan_fwmark:
1833 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1834 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1835 f_list)
1836 return svc;
1837 }
1838
1839 return NULL;
1840}
1841
1842static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1843{
1844 read_unlock_bh(&__ip_vs_svc_lock);
1845}
1846
1847
1848static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1849{
1850 if (v == SEQ_START_TOKEN) {
1851 seq_printf(seq,
1852 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1853 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1854 seq_puts(seq,
1855 "Prot LocalAddress:Port Scheduler Flags\n");
1856 seq_puts(seq,
1857 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1858 } else {
1859 const struct ip_vs_service *svc = v;
1860 const struct ip_vs_iter *iter = seq->private;
1861 const struct ip_vs_dest *dest;
1862
667a5f18
VB
1863 if (iter->table == ip_vs_svc_table) {
1864#ifdef CONFIG_IP_VS_IPV6
1865 if (svc->af == AF_INET6)
1866 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1867 ip_vs_proto_name(svc->protocol),
1868 NIP6(svc->addr.in6),
1869 ntohs(svc->port),
1870 svc->scheduler->name);
1871 else
1872#endif
1873 seq_printf(seq, "%s %08X:%04X %s ",
1874 ip_vs_proto_name(svc->protocol),
1875 ntohl(svc->addr.ip),
1876 ntohs(svc->port),
1877 svc->scheduler->name);
1878 } else {
1da177e4
LT
1879 seq_printf(seq, "FWM %08X %s ",
1880 svc->fwmark, svc->scheduler->name);
667a5f18 1881 }
1da177e4
LT
1882
1883 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1884 seq_printf(seq, "persistent %d %08X\n",
1885 svc->timeout,
1886 ntohl(svc->netmask));
1887 else
1888 seq_putc(seq, '\n');
1889
1890 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1891#ifdef CONFIG_IP_VS_IPV6
1892 if (dest->af == AF_INET6)
1893 seq_printf(seq,
1894 " -> [" NIP6_FMT "]:%04X"
1895 " %-7s %-6d %-10d %-10d\n",
1896 NIP6(dest->addr.in6),
1897 ntohs(dest->port),
1898 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1899 atomic_read(&dest->weight),
1900 atomic_read(&dest->activeconns),
1901 atomic_read(&dest->inactconns));
1902 else
1903#endif
1904 seq_printf(seq,
1905 " -> %08X:%04X "
1906 "%-7s %-6d %-10d %-10d\n",
1907 ntohl(dest->addr.ip),
1908 ntohs(dest->port),
1909 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1910 atomic_read(&dest->weight),
1911 atomic_read(&dest->activeconns),
1912 atomic_read(&dest->inactconns));
1913
1da177e4
LT
1914 }
1915 }
1916 return 0;
1917}
1918
56b3d975 1919static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1920 .start = ip_vs_info_seq_start,
1921 .next = ip_vs_info_seq_next,
1922 .stop = ip_vs_info_seq_stop,
1923 .show = ip_vs_info_seq_show,
1924};
1925
1926static int ip_vs_info_open(struct inode *inode, struct file *file)
1927{
cf7732e4
PE
1928 return seq_open_private(file, &ip_vs_info_seq_ops,
1929 sizeof(struct ip_vs_iter));
1da177e4
LT
1930}
1931
9a32144e 1932static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1933 .owner = THIS_MODULE,
1934 .open = ip_vs_info_open,
1935 .read = seq_read,
1936 .llseek = seq_lseek,
1937 .release = seq_release_private,
1938};
1939
1940#endif
1941
519e49e8
SW
1942struct ip_vs_stats ip_vs_stats = {
1943 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1944};
1da177e4
LT
1945
1946#ifdef CONFIG_PROC_FS
1947static int ip_vs_stats_show(struct seq_file *seq, void *v)
1948{
1949
1950/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1951 seq_puts(seq,
1952 " Total Incoming Outgoing Incoming Outgoing\n");
1953 seq_printf(seq,
1954 " Conns Packets Packets Bytes Bytes\n");
1955
1956 spin_lock_bh(&ip_vs_stats.lock);
e9c0ce23
SW
1957 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1958 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1959 (unsigned long long) ip_vs_stats.ustats.inbytes,
1960 (unsigned long long) ip_vs_stats.ustats.outbytes);
1da177e4
LT
1961
1962/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1963 seq_puts(seq,
1964 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1965 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
e9c0ce23
SW
1966 ip_vs_stats.ustats.cps,
1967 ip_vs_stats.ustats.inpps,
1968 ip_vs_stats.ustats.outpps,
1969 ip_vs_stats.ustats.inbps,
1970 ip_vs_stats.ustats.outbps);
1da177e4
LT
1971 spin_unlock_bh(&ip_vs_stats.lock);
1972
1973 return 0;
1974}
1975
1976static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1977{
1978 return single_open(file, ip_vs_stats_show, NULL);
1979}
1980
9a32144e 1981static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1982 .owner = THIS_MODULE,
1983 .open = ip_vs_stats_seq_open,
1984 .read = seq_read,
1985 .llseek = seq_lseek,
1986 .release = single_release,
1987};
1988
1989#endif
1990
1991/*
1992 * Set timeout values for tcp tcpfin udp in the timeout_table.
1993 */
1994static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1995{
1996 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1997 u->tcp_timeout,
1998 u->tcp_fin_timeout,
1999 u->udp_timeout);
2000
2001#ifdef CONFIG_IP_VS_PROTO_TCP
2002 if (u->tcp_timeout) {
2003 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2004 = u->tcp_timeout * HZ;
2005 }
2006
2007 if (u->tcp_fin_timeout) {
2008 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2009 = u->tcp_fin_timeout * HZ;
2010 }
2011#endif
2012
2013#ifdef CONFIG_IP_VS_PROTO_UDP
2014 if (u->udp_timeout) {
2015 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2016 = u->udp_timeout * HZ;
2017 }
2018#endif
2019 return 0;
2020}
2021
2022
2023#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2024#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2025#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2026 sizeof(struct ip_vs_dest_user))
2027#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2028#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2029#define MAX_ARG_LEN SVCDEST_ARG_LEN
2030
9b5b5cff 2031static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2032 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2033 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2034 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2035 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2036 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2037 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2038 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2039 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2040 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2041 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2043};
2044
c860c6b1
JV
2045static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2046 struct ip_vs_service_user *usvc_compat)
2047{
2048 usvc->af = AF_INET;
2049 usvc->protocol = usvc_compat->protocol;
2050 usvc->addr.ip = usvc_compat->addr;
2051 usvc->port = usvc_compat->port;
2052 usvc->fwmark = usvc_compat->fwmark;
2053
2054 /* Deep copy of sched_name is not needed here */
2055 usvc->sched_name = usvc_compat->sched_name;
2056
2057 usvc->flags = usvc_compat->flags;
2058 usvc->timeout = usvc_compat->timeout;
2059 usvc->netmask = usvc_compat->netmask;
2060}
2061
2062static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2063 struct ip_vs_dest_user *udest_compat)
2064{
2065 udest->addr.ip = udest_compat->addr;
2066 udest->port = udest_compat->port;
2067 udest->conn_flags = udest_compat->conn_flags;
2068 udest->weight = udest_compat->weight;
2069 udest->u_threshold = udest_compat->u_threshold;
2070 udest->l_threshold = udest_compat->l_threshold;
2071}
2072
1da177e4
LT
2073static int
2074do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2075{
2076 int ret;
2077 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2078 struct ip_vs_service_user *usvc_compat;
2079 struct ip_vs_service_user_kern usvc;
1da177e4 2080 struct ip_vs_service *svc;
c860c6b1
JV
2081 struct ip_vs_dest_user *udest_compat;
2082 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2083
2084 if (!capable(CAP_NET_ADMIN))
2085 return -EPERM;
2086
2087 if (len != set_arglen[SET_CMDID(cmd)]) {
2088 IP_VS_ERR("set_ctl: len %u != %u\n",
2089 len, set_arglen[SET_CMDID(cmd)]);
2090 return -EINVAL;
2091 }
2092
2093 if (copy_from_user(arg, user, len) != 0)
2094 return -EFAULT;
2095
2096 /* increase the module use count */
2097 ip_vs_use_count_inc();
2098
14cc3e2b 2099 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2100 ret = -ERESTARTSYS;
2101 goto out_dec;
2102 }
2103
2104 if (cmd == IP_VS_SO_SET_FLUSH) {
2105 /* Flush the virtual service */
2106 ret = ip_vs_flush();
2107 goto out_unlock;
2108 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2109 /* Set timeout values for (tcp tcpfin udp) */
2110 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2111 goto out_unlock;
2112 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2113 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2114 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2115 goto out_unlock;
2116 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2117 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2118 ret = stop_sync_thread(dm->state);
2119 goto out_unlock;
2120 }
2121
c860c6b1
JV
2122 usvc_compat = (struct ip_vs_service_user *)arg;
2123 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2124
2125 /* We only use the new structs internally, so copy userspace compat
2126 * structs to extended internal versions */
2127 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2128 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2129
2130 if (cmd == IP_VS_SO_SET_ZERO) {
2131 /* if no service address is set, zero counters in all */
c860c6b1 2132 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2133 ret = ip_vs_zero_all();
2134 goto out_unlock;
2135 }
2136 }
2137
2138 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2139 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2140 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2141 usvc.protocol, NIPQUAD(usvc.addr.ip),
2142 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2143 ret = -EFAULT;
2144 goto out_unlock;
2145 }
2146
2147 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2148 if (usvc.fwmark == 0)
b18610de
JV
2149 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2150 &usvc.addr, usvc.port);
1da177e4 2151 else
b18610de 2152 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2153
2154 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2155 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2156 ret = -ESRCH;
2157 goto out_unlock;
2158 }
2159
2160 switch (cmd) {
2161 case IP_VS_SO_SET_ADD:
2162 if (svc != NULL)
2163 ret = -EEXIST;
2164 else
c860c6b1 2165 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2166 break;
2167 case IP_VS_SO_SET_EDIT:
c860c6b1 2168 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2169 break;
2170 case IP_VS_SO_SET_DEL:
2171 ret = ip_vs_del_service(svc);
2172 if (!ret)
2173 goto out_unlock;
2174 break;
2175 case IP_VS_SO_SET_ZERO:
2176 ret = ip_vs_zero_service(svc);
2177 break;
2178 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2179 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2180 break;
2181 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2182 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2183 break;
2184 case IP_VS_SO_SET_DELDEST:
c860c6b1 2185 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2186 break;
2187 default:
2188 ret = -EINVAL;
2189 }
2190
2191 if (svc)
2192 ip_vs_service_put(svc);
2193
2194 out_unlock:
14cc3e2b 2195 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2196 out_dec:
2197 /* decrease the module use count */
2198 ip_vs_use_count_dec();
2199
2200 return ret;
2201}
2202
2203
2204static void
2205ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2206{
2207 spin_lock_bh(&src->lock);
e9c0ce23 2208 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2209 spin_unlock_bh(&src->lock);
2210}
2211
2212static void
2213ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2214{
2215 dst->protocol = src->protocol;
e7ade46a 2216 dst->addr = src->addr.ip;
1da177e4
LT
2217 dst->port = src->port;
2218 dst->fwmark = src->fwmark;
4da62fc7 2219 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2220 dst->flags = src->flags;
2221 dst->timeout = src->timeout / HZ;
2222 dst->netmask = src->netmask;
2223 dst->num_dests = src->num_dests;
2224 ip_vs_copy_stats(&dst->stats, &src->stats);
2225}
2226
2227static inline int
2228__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2229 struct ip_vs_get_services __user *uptr)
2230{
2231 int idx, count=0;
2232 struct ip_vs_service *svc;
2233 struct ip_vs_service_entry entry;
2234 int ret = 0;
2235
2236 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2237 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2238 /* Only expose IPv4 entries to old interface */
2239 if (svc->af != AF_INET)
2240 continue;
2241
1da177e4
LT
2242 if (count >= get->num_services)
2243 goto out;
4da62fc7 2244 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2245 ip_vs_copy_service(&entry, svc);
2246 if (copy_to_user(&uptr->entrytable[count],
2247 &entry, sizeof(entry))) {
2248 ret = -EFAULT;
2249 goto out;
2250 }
2251 count++;
2252 }
2253 }
2254
2255 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2256 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2257 /* Only expose IPv4 entries to old interface */
2258 if (svc->af != AF_INET)
2259 continue;
2260
1da177e4
LT
2261 if (count >= get->num_services)
2262 goto out;
4da62fc7 2263 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2264 ip_vs_copy_service(&entry, svc);
2265 if (copy_to_user(&uptr->entrytable[count],
2266 &entry, sizeof(entry))) {
2267 ret = -EFAULT;
2268 goto out;
2269 }
2270 count++;
2271 }
2272 }
2273 out:
2274 return ret;
2275}
2276
2277static inline int
2278__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2279 struct ip_vs_get_dests __user *uptr)
2280{
2281 struct ip_vs_service *svc;
b18610de 2282 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2283 int ret = 0;
2284
2285 if (get->fwmark)
b18610de 2286 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2287 else
b18610de
JV
2288 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2289 get->port);
2290
1da177e4
LT
2291 if (svc) {
2292 int count = 0;
2293 struct ip_vs_dest *dest;
2294 struct ip_vs_dest_entry entry;
2295
2296 list_for_each_entry(dest, &svc->destinations, n_list) {
2297 if (count >= get->num_dests)
2298 break;
2299
e7ade46a 2300 entry.addr = dest->addr.ip;
1da177e4
LT
2301 entry.port = dest->port;
2302 entry.conn_flags = atomic_read(&dest->conn_flags);
2303 entry.weight = atomic_read(&dest->weight);
2304 entry.u_threshold = dest->u_threshold;
2305 entry.l_threshold = dest->l_threshold;
2306 entry.activeconns = atomic_read(&dest->activeconns);
2307 entry.inactconns = atomic_read(&dest->inactconns);
2308 entry.persistconns = atomic_read(&dest->persistconns);
2309 ip_vs_copy_stats(&entry.stats, &dest->stats);
2310 if (copy_to_user(&uptr->entrytable[count],
2311 &entry, sizeof(entry))) {
2312 ret = -EFAULT;
2313 break;
2314 }
2315 count++;
2316 }
2317 ip_vs_service_put(svc);
2318 } else
2319 ret = -ESRCH;
2320 return ret;
2321}
2322
2323static inline void
2324__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2325{
2326#ifdef CONFIG_IP_VS_PROTO_TCP
2327 u->tcp_timeout =
2328 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2329 u->tcp_fin_timeout =
2330 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2331#endif
2332#ifdef CONFIG_IP_VS_PROTO_UDP
2333 u->udp_timeout =
2334 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2335#endif
2336}
2337
2338
2339#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2340#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2341#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2342#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2343#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2344#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2345#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2346
9b5b5cff 2347static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2348 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2349 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2350 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2351 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2352 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2353 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2354 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2355};
2356
2357static int
2358do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2359{
2360 unsigned char arg[128];
2361 int ret = 0;
2362
2363 if (!capable(CAP_NET_ADMIN))
2364 return -EPERM;
2365
2366 if (*len < get_arglen[GET_CMDID(cmd)]) {
2367 IP_VS_ERR("get_ctl: len %u < %u\n",
2368 *len, get_arglen[GET_CMDID(cmd)]);
2369 return -EINVAL;
2370 }
2371
2372 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2373 return -EFAULT;
2374
14cc3e2b 2375 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2376 return -ERESTARTSYS;
2377
2378 switch (cmd) {
2379 case IP_VS_SO_GET_VERSION:
2380 {
2381 char buf[64];
2382
2383 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2384 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2385 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2386 ret = -EFAULT;
2387 goto out;
2388 }
2389 *len = strlen(buf)+1;
2390 }
2391 break;
2392
2393 case IP_VS_SO_GET_INFO:
2394 {
2395 struct ip_vs_getinfo info;
2396 info.version = IP_VS_VERSION_CODE;
2397 info.size = IP_VS_CONN_TAB_SIZE;
2398 info.num_services = ip_vs_num_services;
2399 if (copy_to_user(user, &info, sizeof(info)) != 0)
2400 ret = -EFAULT;
2401 }
2402 break;
2403
2404 case IP_VS_SO_GET_SERVICES:
2405 {
2406 struct ip_vs_get_services *get;
2407 int size;
2408
2409 get = (struct ip_vs_get_services *)arg;
2410 size = sizeof(*get) +
2411 sizeof(struct ip_vs_service_entry) * get->num_services;
2412 if (*len != size) {
2413 IP_VS_ERR("length: %u != %u\n", *len, size);
2414 ret = -EINVAL;
2415 goto out;
2416 }
2417 ret = __ip_vs_get_service_entries(get, user);
2418 }
2419 break;
2420
2421 case IP_VS_SO_GET_SERVICE:
2422 {
2423 struct ip_vs_service_entry *entry;
2424 struct ip_vs_service *svc;
b18610de 2425 union nf_inet_addr addr;
1da177e4
LT
2426
2427 entry = (struct ip_vs_service_entry *)arg;
b18610de 2428 addr.ip = entry->addr;
1da177e4 2429 if (entry->fwmark)
b18610de 2430 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2431 else
b18610de
JV
2432 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2433 &addr, entry->port);
1da177e4
LT
2434 if (svc) {
2435 ip_vs_copy_service(entry, svc);
2436 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2437 ret = -EFAULT;
2438 ip_vs_service_put(svc);
2439 } else
2440 ret = -ESRCH;
2441 }
2442 break;
2443
2444 case IP_VS_SO_GET_DESTS:
2445 {
2446 struct ip_vs_get_dests *get;
2447 int size;
2448
2449 get = (struct ip_vs_get_dests *)arg;
2450 size = sizeof(*get) +
2451 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2452 if (*len != size) {
2453 IP_VS_ERR("length: %u != %u\n", *len, size);
2454 ret = -EINVAL;
2455 goto out;
2456 }
2457 ret = __ip_vs_get_dest_entries(get, user);
2458 }
2459 break;
2460
2461 case IP_VS_SO_GET_TIMEOUT:
2462 {
2463 struct ip_vs_timeout_user t;
2464
2465 __ip_vs_get_timeouts(&t);
2466 if (copy_to_user(user, &t, sizeof(t)) != 0)
2467 ret = -EFAULT;
2468 }
2469 break;
2470
2471 case IP_VS_SO_GET_DAEMON:
2472 {
2473 struct ip_vs_daemon_user d[2];
2474
2475 memset(&d, 0, sizeof(d));
2476 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2477 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2478 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2479 d[0].syncid = ip_vs_master_syncid;
2480 }
2481 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2482 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2483 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2484 d[1].syncid = ip_vs_backup_syncid;
2485 }
2486 if (copy_to_user(user, &d, sizeof(d)) != 0)
2487 ret = -EFAULT;
2488 }
2489 break;
2490
2491 default:
2492 ret = -EINVAL;
2493 }
2494
2495 out:
14cc3e2b 2496 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2497 return ret;
2498}
2499
2500
2501static struct nf_sockopt_ops ip_vs_sockopts = {
2502 .pf = PF_INET,
2503 .set_optmin = IP_VS_BASE_CTL,
2504 .set_optmax = IP_VS_SO_SET_MAX+1,
2505 .set = do_ip_vs_set_ctl,
2506 .get_optmin = IP_VS_BASE_CTL,
2507 .get_optmax = IP_VS_SO_GET_MAX+1,
2508 .get = do_ip_vs_get_ctl,
16fcec35 2509 .owner = THIS_MODULE,
1da177e4
LT
2510};
2511
9a812198
JV
2512/*
2513 * Generic Netlink interface
2514 */
2515
2516/* IPVS genetlink family */
2517static struct genl_family ip_vs_genl_family = {
2518 .id = GENL_ID_GENERATE,
2519 .hdrsize = 0,
2520 .name = IPVS_GENL_NAME,
2521 .version = IPVS_GENL_VERSION,
2522 .maxattr = IPVS_CMD_MAX,
2523};
2524
2525/* Policy used for first-level command attributes */
2526static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2527 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2528 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2529 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2530 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2531 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2532 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2533};
2534
2535/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2536static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2537 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2538 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2539 .len = IP_VS_IFNAME_MAXLEN },
2540 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2541};
2542
2543/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2544static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2545 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2546 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2547 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2548 .len = sizeof(union nf_inet_addr) },
2549 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2550 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2551 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2552 .len = IP_VS_SCHEDNAME_MAXLEN },
2553 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2554 .len = sizeof(struct ip_vs_flags) },
2555 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2556 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2557 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2558};
2559
2560/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2561static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2562 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2563 .len = sizeof(union nf_inet_addr) },
2564 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2565 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2566 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2567 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2568 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2569 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2570 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2571 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2572 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2573};
2574
2575static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2576 struct ip_vs_stats *stats)
2577{
2578 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2579 if (!nl_stats)
2580 return -EMSGSIZE;
2581
2582 spin_lock_bh(&stats->lock);
2583
e9c0ce23
SW
2584 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2585 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2586 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2587 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2588 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2589 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2590 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2591 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2592 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2593 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2594
2595 spin_unlock_bh(&stats->lock);
2596
2597 nla_nest_end(skb, nl_stats);
2598
2599 return 0;
2600
2601nla_put_failure:
2602 spin_unlock_bh(&stats->lock);
2603 nla_nest_cancel(skb, nl_stats);
2604 return -EMSGSIZE;
2605}
2606
2607static int ip_vs_genl_fill_service(struct sk_buff *skb,
2608 struct ip_vs_service *svc)
2609{
2610 struct nlattr *nl_service;
2611 struct ip_vs_flags flags = { .flags = svc->flags,
2612 .mask = ~0 };
2613
2614 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2615 if (!nl_service)
2616 return -EMSGSIZE;
2617
f94fd041 2618 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2619
2620 if (svc->fwmark) {
2621 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2622 } else {
2623 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2624 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2625 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2626 }
2627
2628 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2629 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2630 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2631 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2632
2633 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2634 goto nla_put_failure;
2635
2636 nla_nest_end(skb, nl_service);
2637
2638 return 0;
2639
2640nla_put_failure:
2641 nla_nest_cancel(skb, nl_service);
2642 return -EMSGSIZE;
2643}
2644
2645static int ip_vs_genl_dump_service(struct sk_buff *skb,
2646 struct ip_vs_service *svc,
2647 struct netlink_callback *cb)
2648{
2649 void *hdr;
2650
2651 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2652 &ip_vs_genl_family, NLM_F_MULTI,
2653 IPVS_CMD_NEW_SERVICE);
2654 if (!hdr)
2655 return -EMSGSIZE;
2656
2657 if (ip_vs_genl_fill_service(skb, svc) < 0)
2658 goto nla_put_failure;
2659
2660 return genlmsg_end(skb, hdr);
2661
2662nla_put_failure:
2663 genlmsg_cancel(skb, hdr);
2664 return -EMSGSIZE;
2665}
2666
2667static int ip_vs_genl_dump_services(struct sk_buff *skb,
2668 struct netlink_callback *cb)
2669{
2670 int idx = 0, i;
2671 int start = cb->args[0];
2672 struct ip_vs_service *svc;
2673
2674 mutex_lock(&__ip_vs_mutex);
2675 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2676 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2677 if (++idx <= start)
2678 continue;
2679 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2680 idx--;
2681 goto nla_put_failure;
2682 }
2683 }
2684 }
2685
2686 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2687 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2688 if (++idx <= start)
2689 continue;
2690 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2691 idx--;
2692 goto nla_put_failure;
2693 }
2694 }
2695 }
2696
2697nla_put_failure:
2698 mutex_unlock(&__ip_vs_mutex);
2699 cb->args[0] = idx;
2700
2701 return skb->len;
2702}
2703
c860c6b1 2704static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2705 struct nlattr *nla, int full_entry)
2706{
2707 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2708 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2709
2710 /* Parse mandatory identifying service fields first */
2711 if (nla == NULL ||
2712 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2713 return -EINVAL;
2714
2715 nla_af = attrs[IPVS_SVC_ATTR_AF];
2716 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2717 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2718 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2719 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2720
2721 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2722 return -EINVAL;
2723
c860c6b1 2724 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2725#ifdef CONFIG_IP_VS_IPV6
2726 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2727#else
2728 if (usvc->af != AF_INET)
2729#endif
9a812198
JV
2730 return -EAFNOSUPPORT;
2731
2732 if (nla_fwmark) {
2733 usvc->protocol = IPPROTO_TCP;
2734 usvc->fwmark = nla_get_u32(nla_fwmark);
2735 } else {
2736 usvc->protocol = nla_get_u16(nla_protocol);
2737 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2738 usvc->port = nla_get_u16(nla_port);
2739 usvc->fwmark = 0;
2740 }
2741
2742 /* If a full entry was requested, check for the additional fields */
2743 if (full_entry) {
2744 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2745 *nla_netmask;
2746 struct ip_vs_flags flags;
2747 struct ip_vs_service *svc;
2748
2749 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2750 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2751 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2752 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2753
2754 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2755 return -EINVAL;
2756
2757 nla_memcpy(&flags, nla_flags, sizeof(flags));
2758
2759 /* prefill flags from service if it already exists */
2760 if (usvc->fwmark)
b18610de 2761 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2762 else
b18610de
JV
2763 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2764 &usvc->addr, usvc->port);
9a812198
JV
2765 if (svc) {
2766 usvc->flags = svc->flags;
2767 ip_vs_service_put(svc);
2768 } else
2769 usvc->flags = 0;
2770
2771 /* set new flags from userland */
2772 usvc->flags = (usvc->flags & ~flags.mask) |
2773 (flags.flags & flags.mask);
c860c6b1 2774 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2775 usvc->timeout = nla_get_u32(nla_timeout);
2776 usvc->netmask = nla_get_u32(nla_netmask);
2777 }
2778
2779 return 0;
2780}
2781
2782static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2783{
c860c6b1 2784 struct ip_vs_service_user_kern usvc;
9a812198
JV
2785 int ret;
2786
2787 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2788 if (ret)
2789 return ERR_PTR(ret);
2790
2791 if (usvc.fwmark)
b18610de 2792 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2793 else
b18610de
JV
2794 return __ip_vs_service_get(usvc.af, usvc.protocol,
2795 &usvc.addr, usvc.port);
9a812198
JV
2796}
2797
2798static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2799{
2800 struct nlattr *nl_dest;
2801
2802 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2803 if (!nl_dest)
2804 return -EMSGSIZE;
2805
2806 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2807 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2808
2809 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2810 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2811 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2812 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2813 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2814 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2815 atomic_read(&dest->activeconns));
2816 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2817 atomic_read(&dest->inactconns));
2818 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2819 atomic_read(&dest->persistconns));
2820
2821 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2822 goto nla_put_failure;
2823
2824 nla_nest_end(skb, nl_dest);
2825
2826 return 0;
2827
2828nla_put_failure:
2829 nla_nest_cancel(skb, nl_dest);
2830 return -EMSGSIZE;
2831}
2832
2833static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2834 struct netlink_callback *cb)
2835{
2836 void *hdr;
2837
2838 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2839 &ip_vs_genl_family, NLM_F_MULTI,
2840 IPVS_CMD_NEW_DEST);
2841 if (!hdr)
2842 return -EMSGSIZE;
2843
2844 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2845 goto nla_put_failure;
2846
2847 return genlmsg_end(skb, hdr);
2848
2849nla_put_failure:
2850 genlmsg_cancel(skb, hdr);
2851 return -EMSGSIZE;
2852}
2853
2854static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2855 struct netlink_callback *cb)
2856{
2857 int idx = 0;
2858 int start = cb->args[0];
2859 struct ip_vs_service *svc;
2860 struct ip_vs_dest *dest;
2861 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2862
2863 mutex_lock(&__ip_vs_mutex);
2864
2865 /* Try to find the service for which to dump destinations */
2866 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2867 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2868 goto out_err;
2869
2870 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2871 if (IS_ERR(svc) || svc == NULL)
2872 goto out_err;
2873
2874 /* Dump the destinations */
2875 list_for_each_entry(dest, &svc->destinations, n_list) {
2876 if (++idx <= start)
2877 continue;
2878 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2879 idx--;
2880 goto nla_put_failure;
2881 }
2882 }
2883
2884nla_put_failure:
2885 cb->args[0] = idx;
2886 ip_vs_service_put(svc);
2887
2888out_err:
2889 mutex_unlock(&__ip_vs_mutex);
2890
2891 return skb->len;
2892}
2893
c860c6b1 2894static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2895 struct nlattr *nla, int full_entry)
2896{
2897 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2898 struct nlattr *nla_addr, *nla_port;
2899
2900 /* Parse mandatory identifying destination fields first */
2901 if (nla == NULL ||
2902 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2903 return -EINVAL;
2904
2905 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2906 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2907
2908 if (!(nla_addr && nla_port))
2909 return -EINVAL;
2910
2911 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2912 udest->port = nla_get_u16(nla_port);
2913
2914 /* If a full entry was requested, check for the additional fields */
2915 if (full_entry) {
2916 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2917 *nla_l_thresh;
2918
2919 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2920 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2921 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2922 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2923
2924 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2925 return -EINVAL;
2926
2927 udest->conn_flags = nla_get_u32(nla_fwd)
2928 & IP_VS_CONN_F_FWD_MASK;
2929 udest->weight = nla_get_u32(nla_weight);
2930 udest->u_threshold = nla_get_u32(nla_u_thresh);
2931 udest->l_threshold = nla_get_u32(nla_l_thresh);
2932 }
2933
2934 return 0;
2935}
2936
2937static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2938 const char *mcast_ifn, __be32 syncid)
2939{
2940 struct nlattr *nl_daemon;
2941
2942 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2943 if (!nl_daemon)
2944 return -EMSGSIZE;
2945
2946 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2947 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2948 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2949
2950 nla_nest_end(skb, nl_daemon);
2951
2952 return 0;
2953
2954nla_put_failure:
2955 nla_nest_cancel(skb, nl_daemon);
2956 return -EMSGSIZE;
2957}
2958
2959static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2960 const char *mcast_ifn, __be32 syncid,
2961 struct netlink_callback *cb)
2962{
2963 void *hdr;
2964 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2965 &ip_vs_genl_family, NLM_F_MULTI,
2966 IPVS_CMD_NEW_DAEMON);
2967 if (!hdr)
2968 return -EMSGSIZE;
2969
2970 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2971 goto nla_put_failure;
2972
2973 return genlmsg_end(skb, hdr);
2974
2975nla_put_failure:
2976 genlmsg_cancel(skb, hdr);
2977 return -EMSGSIZE;
2978}
2979
2980static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2981 struct netlink_callback *cb)
2982{
2983 mutex_lock(&__ip_vs_mutex);
2984 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2985 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2986 ip_vs_master_mcast_ifn,
2987 ip_vs_master_syncid, cb) < 0)
2988 goto nla_put_failure;
2989
2990 cb->args[0] = 1;
2991 }
2992
2993 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2994 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2995 ip_vs_backup_mcast_ifn,
2996 ip_vs_backup_syncid, cb) < 0)
2997 goto nla_put_failure;
2998
2999 cb->args[1] = 1;
3000 }
3001
3002nla_put_failure:
3003 mutex_unlock(&__ip_vs_mutex);
3004
3005 return skb->len;
3006}
3007
3008static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3009{
3010 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3011 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3012 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3013 return -EINVAL;
3014
3015 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3016 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3017 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3018}
3019
3020static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3021{
3022 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3023 return -EINVAL;
3024
3025 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3026}
3027
3028static int ip_vs_genl_set_config(struct nlattr **attrs)
3029{
3030 struct ip_vs_timeout_user t;
3031
3032 __ip_vs_get_timeouts(&t);
3033
3034 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3035 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3036
3037 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3038 t.tcp_fin_timeout =
3039 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3040
3041 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3042 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3043
3044 return ip_vs_set_timeout(&t);
3045}
3046
3047static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3048{
3049 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3050 struct ip_vs_service_user_kern usvc;
3051 struct ip_vs_dest_user_kern udest;
9a812198
JV
3052 int ret = 0, cmd;
3053 int need_full_svc = 0, need_full_dest = 0;
3054
3055 cmd = info->genlhdr->cmd;
3056
3057 mutex_lock(&__ip_vs_mutex);
3058
3059 if (cmd == IPVS_CMD_FLUSH) {
3060 ret = ip_vs_flush();
3061 goto out;
3062 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3063 ret = ip_vs_genl_set_config(info->attrs);
3064 goto out;
3065 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3066 cmd == IPVS_CMD_DEL_DAEMON) {
3067
3068 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3069
3070 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3071 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3072 info->attrs[IPVS_CMD_ATTR_DAEMON],
3073 ip_vs_daemon_policy)) {
3074 ret = -EINVAL;
3075 goto out;
3076 }
3077
3078 if (cmd == IPVS_CMD_NEW_DAEMON)
3079 ret = ip_vs_genl_new_daemon(daemon_attrs);
3080 else
3081 ret = ip_vs_genl_del_daemon(daemon_attrs);
3082 goto out;
3083 } else if (cmd == IPVS_CMD_ZERO &&
3084 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3085 ret = ip_vs_zero_all();
3086 goto out;
3087 }
3088
3089 /* All following commands require a service argument, so check if we
3090 * received a valid one. We need a full service specification when
3091 * adding / editing a service. Only identifying members otherwise. */
3092 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3093 need_full_svc = 1;
3094
3095 ret = ip_vs_genl_parse_service(&usvc,
3096 info->attrs[IPVS_CMD_ATTR_SERVICE],
3097 need_full_svc);
3098 if (ret)
3099 goto out;
3100
3101 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3102 if (usvc.fwmark == 0)
b18610de
JV
3103 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3104 &usvc.addr, usvc.port);
9a812198 3105 else
b18610de 3106 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3107
3108 /* Unless we're adding a new service, the service must already exist */
3109 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3110 ret = -ESRCH;
3111 goto out;
3112 }
3113
3114 /* Destination commands require a valid destination argument. For
3115 * adding / editing a destination, we need a full destination
3116 * specification. */
3117 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3118 cmd == IPVS_CMD_DEL_DEST) {
3119 if (cmd != IPVS_CMD_DEL_DEST)
3120 need_full_dest = 1;
3121
3122 ret = ip_vs_genl_parse_dest(&udest,
3123 info->attrs[IPVS_CMD_ATTR_DEST],
3124 need_full_dest);
3125 if (ret)
3126 goto out;
3127 }
3128
3129 switch (cmd) {
3130 case IPVS_CMD_NEW_SERVICE:
3131 if (svc == NULL)
3132 ret = ip_vs_add_service(&usvc, &svc);
3133 else
3134 ret = -EEXIST;
3135 break;
3136 case IPVS_CMD_SET_SERVICE:
3137 ret = ip_vs_edit_service(svc, &usvc);
3138 break;
3139 case IPVS_CMD_DEL_SERVICE:
3140 ret = ip_vs_del_service(svc);
3141 break;
3142 case IPVS_CMD_NEW_DEST:
3143 ret = ip_vs_add_dest(svc, &udest);
3144 break;
3145 case IPVS_CMD_SET_DEST:
3146 ret = ip_vs_edit_dest(svc, &udest);
3147 break;
3148 case IPVS_CMD_DEL_DEST:
3149 ret = ip_vs_del_dest(svc, &udest);
3150 break;
3151 case IPVS_CMD_ZERO:
3152 ret = ip_vs_zero_service(svc);
3153 break;
3154 default:
3155 ret = -EINVAL;
3156 }
3157
3158out:
3159 if (svc)
3160 ip_vs_service_put(svc);
3161 mutex_unlock(&__ip_vs_mutex);
3162
3163 return ret;
3164}
3165
3166static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3167{
3168 struct sk_buff *msg;
3169 void *reply;
3170 int ret, cmd, reply_cmd;
3171
3172 cmd = info->genlhdr->cmd;
3173
3174 if (cmd == IPVS_CMD_GET_SERVICE)
3175 reply_cmd = IPVS_CMD_NEW_SERVICE;
3176 else if (cmd == IPVS_CMD_GET_INFO)
3177 reply_cmd = IPVS_CMD_SET_INFO;
3178 else if (cmd == IPVS_CMD_GET_CONFIG)
3179 reply_cmd = IPVS_CMD_SET_CONFIG;
3180 else {
3181 IP_VS_ERR("unknown Generic Netlink command\n");
3182 return -EINVAL;
3183 }
3184
3185 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3186 if (!msg)
3187 return -ENOMEM;
3188
3189 mutex_lock(&__ip_vs_mutex);
3190
3191 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3192 if (reply == NULL)
3193 goto nla_put_failure;
3194
3195 switch (cmd) {
3196 case IPVS_CMD_GET_SERVICE:
3197 {
3198 struct ip_vs_service *svc;
3199
3200 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3201 if (IS_ERR(svc)) {
3202 ret = PTR_ERR(svc);
3203 goto out_err;
3204 } else if (svc) {
3205 ret = ip_vs_genl_fill_service(msg, svc);
3206 ip_vs_service_put(svc);
3207 if (ret)
3208 goto nla_put_failure;
3209 } else {
3210 ret = -ESRCH;
3211 goto out_err;
3212 }
3213
3214 break;
3215 }
3216
3217 case IPVS_CMD_GET_CONFIG:
3218 {
3219 struct ip_vs_timeout_user t;
3220
3221 __ip_vs_get_timeouts(&t);
3222#ifdef CONFIG_IP_VS_PROTO_TCP
3223 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3224 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3225 t.tcp_fin_timeout);
3226#endif
3227#ifdef CONFIG_IP_VS_PROTO_UDP
3228 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3229#endif
3230
3231 break;
3232 }
3233
3234 case IPVS_CMD_GET_INFO:
3235 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3236 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3237 IP_VS_CONN_TAB_SIZE);
3238 break;
3239 }
3240
3241 genlmsg_end(msg, reply);
3242 ret = genlmsg_unicast(msg, info->snd_pid);
3243 goto out;
3244
3245nla_put_failure:
3246 IP_VS_ERR("not enough space in Netlink message\n");
3247 ret = -EMSGSIZE;
3248
3249out_err:
3250 nlmsg_free(msg);
3251out:
3252 mutex_unlock(&__ip_vs_mutex);
3253
3254 return ret;
3255}
3256
3257
3258static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3259 {
3260 .cmd = IPVS_CMD_NEW_SERVICE,
3261 .flags = GENL_ADMIN_PERM,
3262 .policy = ip_vs_cmd_policy,
3263 .doit = ip_vs_genl_set_cmd,
3264 },
3265 {
3266 .cmd = IPVS_CMD_SET_SERVICE,
3267 .flags = GENL_ADMIN_PERM,
3268 .policy = ip_vs_cmd_policy,
3269 .doit = ip_vs_genl_set_cmd,
3270 },
3271 {
3272 .cmd = IPVS_CMD_DEL_SERVICE,
3273 .flags = GENL_ADMIN_PERM,
3274 .policy = ip_vs_cmd_policy,
3275 .doit = ip_vs_genl_set_cmd,
3276 },
3277 {
3278 .cmd = IPVS_CMD_GET_SERVICE,
3279 .flags = GENL_ADMIN_PERM,
3280 .doit = ip_vs_genl_get_cmd,
3281 .dumpit = ip_vs_genl_dump_services,
3282 .policy = ip_vs_cmd_policy,
3283 },
3284 {
3285 .cmd = IPVS_CMD_NEW_DEST,
3286 .flags = GENL_ADMIN_PERM,
3287 .policy = ip_vs_cmd_policy,
3288 .doit = ip_vs_genl_set_cmd,
3289 },
3290 {
3291 .cmd = IPVS_CMD_SET_DEST,
3292 .flags = GENL_ADMIN_PERM,
3293 .policy = ip_vs_cmd_policy,
3294 .doit = ip_vs_genl_set_cmd,
3295 },
3296 {
3297 .cmd = IPVS_CMD_DEL_DEST,
3298 .flags = GENL_ADMIN_PERM,
3299 .policy = ip_vs_cmd_policy,
3300 .doit = ip_vs_genl_set_cmd,
3301 },
3302 {
3303 .cmd = IPVS_CMD_GET_DEST,
3304 .flags = GENL_ADMIN_PERM,
3305 .policy = ip_vs_cmd_policy,
3306 .dumpit = ip_vs_genl_dump_dests,
3307 },
3308 {
3309 .cmd = IPVS_CMD_NEW_DAEMON,
3310 .flags = GENL_ADMIN_PERM,
3311 .policy = ip_vs_cmd_policy,
3312 .doit = ip_vs_genl_set_cmd,
3313 },
3314 {
3315 .cmd = IPVS_CMD_DEL_DAEMON,
3316 .flags = GENL_ADMIN_PERM,
3317 .policy = ip_vs_cmd_policy,
3318 .doit = ip_vs_genl_set_cmd,
3319 },
3320 {
3321 .cmd = IPVS_CMD_GET_DAEMON,
3322 .flags = GENL_ADMIN_PERM,
3323 .dumpit = ip_vs_genl_dump_daemons,
3324 },
3325 {
3326 .cmd = IPVS_CMD_SET_CONFIG,
3327 .flags = GENL_ADMIN_PERM,
3328 .policy = ip_vs_cmd_policy,
3329 .doit = ip_vs_genl_set_cmd,
3330 },
3331 {
3332 .cmd = IPVS_CMD_GET_CONFIG,
3333 .flags = GENL_ADMIN_PERM,
3334 .doit = ip_vs_genl_get_cmd,
3335 },
3336 {
3337 .cmd = IPVS_CMD_GET_INFO,
3338 .flags = GENL_ADMIN_PERM,
3339 .doit = ip_vs_genl_get_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_ZERO,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3346 },
3347 {
3348 .cmd = IPVS_CMD_FLUSH,
3349 .flags = GENL_ADMIN_PERM,
3350 .doit = ip_vs_genl_set_cmd,
3351 },
3352};
3353
3354static int __init ip_vs_genl_register(void)
3355{
3356 int ret, i;
3357
3358 ret = genl_register_family(&ip_vs_genl_family);
3359 if (ret)
3360 return ret;
3361
3362 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3363 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3364 if (ret)
3365 goto err_out;
3366 }
3367 return 0;
3368
3369err_out:
3370 genl_unregister_family(&ip_vs_genl_family);
3371 return ret;
3372}
3373
3374static void ip_vs_genl_unregister(void)
3375{
3376 genl_unregister_family(&ip_vs_genl_family);
3377}
3378
3379/* End of Generic Netlink interface definitions */
3380
1da177e4 3381
048cf48b 3382int __init ip_vs_control_init(void)
1da177e4
LT
3383{
3384 int ret;
3385 int idx;
3386
3387 EnterFunction(2);
3388
3389 ret = nf_register_sockopt(&ip_vs_sockopts);
3390 if (ret) {
3391 IP_VS_ERR("cannot register sockopt.\n");
3392 return ret;
3393 }
3394
9a812198
JV
3395 ret = ip_vs_genl_register();
3396 if (ret) {
3397 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3398 nf_unregister_sockopt(&ip_vs_sockopts);
3399 return ret;
3400 }
3401
457c4cbc
EB
3402 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3403 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3404
90754f8e 3405 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3406
3407 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3408 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3409 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3410 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3411 }
3412 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3413 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3414 }
3415
1da177e4
LT
3416 ip_vs_new_estimator(&ip_vs_stats);
3417
3418 /* Hook the defense timer */
3419 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3420
3421 LeaveFunction(2);
3422 return 0;
3423}
3424
3425
3426void ip_vs_control_cleanup(void)
3427{
3428 EnterFunction(2);
3429 ip_vs_trash_cleanup();
3430 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3431 cancel_work_sync(&defense_work.work);
1da177e4
LT
3432 ip_vs_kill_estimator(&ip_vs_stats);
3433 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3434 proc_net_remove(&init_net, "ip_vs_stats");
3435 proc_net_remove(&init_net, "ip_vs");
9a812198 3436 ip_vs_genl_unregister();
1da177e4
LT
3437 nf_unregister_sockopt(&ip_vs_sockopts);
3438 LeaveFunction(2);
3439}