]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/netfilter/ipvs/ip_vs_ctl.c
ipvs: netfilter connection tracking changes
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
1da177e4 41#include <net/ip.h>
09571c7a
VB
42#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h>
44#include <net/ip6_route.h>
45#endif
14c85021 46#include <net/route.h>
1da177e4 47#include <net/sock.h>
9a812198 48#include <net/genetlink.h>
1da177e4
LT
49
50#include <asm/uaccess.h>
51
52#include <net/ip_vs.h>
53
54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 55static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
56
57/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
4f72816e 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
1da177e4
LT
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
f4bc17cd
JA
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
1da177e4
LT
94
95
96#ifdef CONFIG_IP_VS_DEBUG
97static int sysctl_ip_vs_debug_level = 0;
98
99int ip_vs_get_debug_level(void)
100{
101 return sysctl_ip_vs_debug_level;
102}
103#endif
104
09571c7a
VB
105#ifdef CONFIG_IP_VS_IPV6
106/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
107static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
108{
109 struct rt6_info *rt;
110 struct flowi fl = {
111 .oif = 0,
112 .nl_u = {
113 .ip6_u = {
114 .daddr = *addr,
115 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
116 };
117
118 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120 return 1;
121
122 return 0;
123}
124#endif
1da177e4 125/*
af9debd4
JA
126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
1da177e4
LT
128 */
129static void update_defense_level(void)
130{
131 struct sysinfo i;
132 static int old_secure_tcp = 0;
133 int availmem;
134 int nomem;
135 int to_change = -1;
136
137 /* we only count free and buffered memory (in pages) */
138 si_meminfo(&i);
139 availmem = i.freeram + i.bufferram;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
141 we need adjust it */
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145 nomem = (availmem < sysctl_ip_vs_amemthresh);
146
af9debd4
JA
147 local_bh_disable();
148
1da177e4
LT
149 /* drop_entry */
150 spin_lock(&__ip_vs_dropentry_lock);
151 switch (sysctl_ip_vs_drop_entry) {
152 case 0:
153 atomic_set(&ip_vs_dropentry, 0);
154 break;
155 case 1:
156 if (nomem) {
157 atomic_set(&ip_vs_dropentry, 1);
158 sysctl_ip_vs_drop_entry = 2;
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 }
162 break;
163 case 2:
164 if (nomem) {
165 atomic_set(&ip_vs_dropentry, 1);
166 } else {
167 atomic_set(&ip_vs_dropentry, 0);
168 sysctl_ip_vs_drop_entry = 1;
169 };
170 break;
171 case 3:
172 atomic_set(&ip_vs_dropentry, 1);
173 break;
174 }
175 spin_unlock(&__ip_vs_dropentry_lock);
176
177 /* drop_packet */
178 spin_lock(&__ip_vs_droppacket_lock);
179 switch (sysctl_ip_vs_drop_packet) {
180 case 0:
181 ip_vs_drop_rate = 0;
182 break;
183 case 1:
184 if (nomem) {
185 ip_vs_drop_rate = ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh /
187 (sysctl_ip_vs_amemthresh-availmem);
188 sysctl_ip_vs_drop_packet = 2;
189 } else {
190 ip_vs_drop_rate = 0;
191 }
192 break;
193 case 2:
194 if (nomem) {
195 ip_vs_drop_rate = ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh /
197 (sysctl_ip_vs_amemthresh-availmem);
198 } else {
199 ip_vs_drop_rate = 0;
200 sysctl_ip_vs_drop_packet = 1;
201 }
202 break;
203 case 3:
204 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205 break;
206 }
207 spin_unlock(&__ip_vs_droppacket_lock);
208
209 /* secure_tcp */
4f72816e 210 spin_lock(&ip_vs_securetcp_lock);
1da177e4
LT
211 switch (sysctl_ip_vs_secure_tcp) {
212 case 0:
213 if (old_secure_tcp >= 2)
214 to_change = 0;
215 break;
216 case 1:
217 if (nomem) {
218 if (old_secure_tcp < 2)
219 to_change = 1;
220 sysctl_ip_vs_secure_tcp = 2;
221 } else {
222 if (old_secure_tcp >= 2)
223 to_change = 0;
224 }
225 break;
226 case 2:
227 if (nomem) {
228 if (old_secure_tcp < 2)
229 to_change = 1;
230 } else {
231 if (old_secure_tcp >= 2)
232 to_change = 0;
233 sysctl_ip_vs_secure_tcp = 1;
234 }
235 break;
236 case 3:
237 if (old_secure_tcp < 2)
238 to_change = 1;
239 break;
240 }
241 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 if (to_change >= 0)
243 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
4f72816e 244 spin_unlock(&ip_vs_securetcp_lock);
af9debd4
JA
245
246 local_bh_enable();
1da177e4
LT
247}
248
249
250/*
251 * Timer for checking the defense
252 */
253#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
254static void defense_work_handler(struct work_struct *work);
255static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 256
c4028958 257static void defense_work_handler(struct work_struct *work)
1da177e4
LT
258{
259 update_defense_level();
260 if (atomic_read(&ip_vs_dropentry))
261 ip_vs_random_dropentry();
262
263 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
264}
265
266int
267ip_vs_use_count_inc(void)
268{
269 return try_module_get(THIS_MODULE);
270}
271
272void
273ip_vs_use_count_dec(void)
274{
275 module_put(THIS_MODULE);
276}
277
278
279/*
280 * Hash table: for virtual service lookups
281 */
282#define IP_VS_SVC_TAB_BITS 8
283#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
284#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
285
286/* the service table hashed by <protocol, addr, port> */
287static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
288/* the service table hashed by fwmark */
289static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
290
291/*
292 * Hash table: for real service lookups
293 */
294#define IP_VS_RTAB_BITS 4
295#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
296#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
297
298static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
299
300/*
301 * Trash for destinations
302 */
303static LIST_HEAD(ip_vs_dest_trash);
304
305/*
306 * FTP & NULL virtual service counters
307 */
308static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
309static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
310
311
312/*
313 * Returns hash value for virtual service
314 */
315static __inline__ unsigned
b18610de
JV
316ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
317 __be16 port)
1da177e4
LT
318{
319 register unsigned porth = ntohs(port);
b18610de 320 __be32 addr_fold = addr->ip;
1da177e4 321
b18610de
JV
322#ifdef CONFIG_IP_VS_IPV6
323 if (af == AF_INET6)
324 addr_fold = addr->ip6[0]^addr->ip6[1]^
325 addr->ip6[2]^addr->ip6[3];
326#endif
327
328 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
329 & IP_VS_SVC_TAB_MASK;
330}
331
332/*
333 * Returns hash value of fwmark for virtual service lookup
334 */
335static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
336{
337 return fwmark & IP_VS_SVC_TAB_MASK;
338}
339
340/*
341 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
342 * or in the ip_vs_svc_fwm_table by fwmark.
343 * Should be called with locked tables.
344 */
345static int ip_vs_svc_hash(struct ip_vs_service *svc)
346{
347 unsigned hash;
348
349 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
350 pr_err("%s(): request for already hashed, called from %pF\n",
351 __func__, __builtin_return_address(0));
1da177e4
LT
352 return 0;
353 }
354
355 if (svc->fwmark == 0) {
356 /*
357 * Hash it by <protocol,addr,port> in ip_vs_svc_table
358 */
b18610de 359 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 360 svc->port);
1da177e4
LT
361 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
362 } else {
363 /*
364 * Hash it by fwmark in ip_vs_svc_fwm_table
365 */
366 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
367 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
368 }
369
370 svc->flags |= IP_VS_SVC_F_HASHED;
371 /* increase its refcnt because it is referenced by the svc table */
372 atomic_inc(&svc->refcnt);
373 return 1;
374}
375
376
377/*
378 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
379 * Should be called with locked tables.
380 */
381static int ip_vs_svc_unhash(struct ip_vs_service *svc)
382{
383 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
384 pr_err("%s(): request for unhash flagged, called from %pF\n",
385 __func__, __builtin_return_address(0));
1da177e4
LT
386 return 0;
387 }
388
389 if (svc->fwmark == 0) {
390 /* Remove it from the ip_vs_svc_table table */
391 list_del(&svc->s_list);
392 } else {
393 /* Remove it from the ip_vs_svc_fwm_table table */
394 list_del(&svc->f_list);
395 }
396
397 svc->flags &= ~IP_VS_SVC_F_HASHED;
398 atomic_dec(&svc->refcnt);
399 return 1;
400}
401
402
403/*
404 * Get service by {proto,addr,port} in the service table.
405 */
b18610de
JV
406static inline struct ip_vs_service *
407__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
408 __be16 vport)
1da177e4
LT
409{
410 unsigned hash;
411 struct ip_vs_service *svc;
412
413 /* Check for "full" addressed entries */
b18610de 414 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
415
416 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
417 if ((svc->af == af)
418 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
419 && (svc->port == vport)
420 && (svc->protocol == protocol)) {
421 /* HIT */
422 atomic_inc(&svc->usecnt);
423 return svc;
424 }
425 }
426
427 return NULL;
428}
429
430
431/*
432 * Get service by {fwmark} in the service table.
433 */
b18610de
JV
434static inline struct ip_vs_service *
435__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
436{
437 unsigned hash;
438 struct ip_vs_service *svc;
439
440 /* Check for fwmark addressed entries */
441 hash = ip_vs_svc_fwm_hashkey(fwmark);
442
443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 444 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
445 /* HIT */
446 atomic_inc(&svc->usecnt);
447 return svc;
448 }
449 }
450
451 return NULL;
452}
453
454struct ip_vs_service *
3c2e0505
JV
455ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
456 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
457{
458 struct ip_vs_service *svc;
3c2e0505 459
1da177e4
LT
460 read_lock(&__ip_vs_svc_lock);
461
462 /*
463 * Check the table hashed by fwmark first
464 */
3c2e0505 465 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
466 goto out;
467
468 /*
469 * Check the table hashed by <protocol,addr,port>
470 * for "full" addressed entries
471 */
3c2e0505 472 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
473
474 if (svc == NULL
475 && protocol == IPPROTO_TCP
476 && atomic_read(&ip_vs_ftpsvc_counter)
477 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
478 /*
479 * Check if ftp service entry exists, the packet
480 * might belong to FTP data connections.
481 */
3c2e0505 482 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
483 }
484
485 if (svc == NULL
486 && atomic_read(&ip_vs_nullsvc_counter)) {
487 /*
488 * Check if the catch-all port (port zero) exists
489 */
3c2e0505 490 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
491 }
492
493 out:
494 read_unlock(&__ip_vs_svc_lock);
495
3c2e0505
JV
496 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
497 fwmark, ip_vs_proto_name(protocol),
498 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
499 svc ? "hit" : "not hit");
1da177e4
LT
500
501 return svc;
502}
503
504
505static inline void
506__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
507{
508 atomic_inc(&svc->refcnt);
509 dest->svc = svc;
510}
511
512static inline void
513__ip_vs_unbind_svc(struct ip_vs_dest *dest)
514{
515 struct ip_vs_service *svc = dest->svc;
516
517 dest->svc = NULL;
518 if (atomic_dec_and_test(&svc->refcnt))
519 kfree(svc);
520}
521
522
523/*
524 * Returns hash value for real service
525 */
7937df15
JV
526static inline unsigned ip_vs_rs_hashkey(int af,
527 const union nf_inet_addr *addr,
528 __be16 port)
1da177e4
LT
529{
530 register unsigned porth = ntohs(port);
7937df15
JV
531 __be32 addr_fold = addr->ip;
532
533#ifdef CONFIG_IP_VS_IPV6
534 if (af == AF_INET6)
535 addr_fold = addr->ip6[0]^addr->ip6[1]^
536 addr->ip6[2]^addr->ip6[3];
537#endif
1da177e4 538
7937df15 539 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
540 & IP_VS_RTAB_MASK;
541}
542
543/*
544 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
545 * should be called with locked tables.
546 */
547static int ip_vs_rs_hash(struct ip_vs_dest *dest)
548{
549 unsigned hash;
550
551 if (!list_empty(&dest->d_list)) {
552 return 0;
553 }
554
555 /*
556 * Hash by proto,addr,port,
557 * which are the parameters of the real service.
558 */
7937df15
JV
559 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
560
1da177e4
LT
561 list_add(&dest->d_list, &ip_vs_rtable[hash]);
562
563 return 1;
564}
565
566/*
567 * UNhashes ip_vs_dest from ip_vs_rtable.
568 * should be called with locked tables.
569 */
570static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
571{
572 /*
573 * Remove it from the ip_vs_rtable table.
574 */
575 if (!list_empty(&dest->d_list)) {
576 list_del(&dest->d_list);
577 INIT_LIST_HEAD(&dest->d_list);
578 }
579
580 return 1;
581}
582
583/*
584 * Lookup real service by <proto,addr,port> in the real service table.
585 */
586struct ip_vs_dest *
7937df15
JV
587ip_vs_lookup_real_service(int af, __u16 protocol,
588 const union nf_inet_addr *daddr,
589 __be16 dport)
1da177e4
LT
590{
591 unsigned hash;
592 struct ip_vs_dest *dest;
593
594 /*
595 * Check for "full" addressed entries
596 * Return the first found entry
597 */
7937df15 598 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
599
600 read_lock(&__ip_vs_rs_lock);
601 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
602 if ((dest->af == af)
603 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
604 && (dest->port == dport)
605 && ((dest->protocol == protocol) ||
606 dest->vfwmark)) {
607 /* HIT */
608 read_unlock(&__ip_vs_rs_lock);
609 return dest;
610 }
611 }
612 read_unlock(&__ip_vs_rs_lock);
613
614 return NULL;
615}
616
617/*
618 * Lookup destination by {addr,port} in the given service
619 */
620static struct ip_vs_dest *
7937df15
JV
621ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
622 __be16 dport)
1da177e4
LT
623{
624 struct ip_vs_dest *dest;
625
626 /*
627 * Find the destination for the given service
628 */
629 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
630 if ((dest->af == svc->af)
631 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
632 && (dest->port == dport)) {
1da177e4
LT
633 /* HIT */
634 return dest;
635 }
636 }
637
638 return NULL;
639}
640
1e356f9c
RB
641/*
642 * Find destination by {daddr,dport,vaddr,protocol}
643 * Cretaed to be used in ip_vs_process_message() in
644 * the backup synchronization daemon. It finds the
645 * destination to be bound to the received connection
646 * on the backup.
647 *
648 * ip_vs_lookup_real_service() looked promissing, but
649 * seems not working as expected.
650 */
7937df15
JV
651struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
652 __be16 dport,
653 const union nf_inet_addr *vaddr,
654 __be16 vport, __u16 protocol)
1e356f9c
RB
655{
656 struct ip_vs_dest *dest;
657 struct ip_vs_service *svc;
658
7937df15 659 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
660 if (!svc)
661 return NULL;
662 dest = ip_vs_lookup_dest(svc, daddr, dport);
663 if (dest)
664 atomic_inc(&dest->refcnt);
665 ip_vs_service_put(svc);
666 return dest;
667}
1da177e4
LT
668
669/*
670 * Lookup dest by {svc,addr,port} in the destination trash.
671 * The destination trash is used to hold the destinations that are removed
672 * from the service table but are still referenced by some conn entries.
673 * The reason to add the destination trash is when the dest is temporary
674 * down (either by administrator or by monitor program), the dest can be
675 * picked back from the trash, the remaining connections to the dest can
676 * continue, and the counting information of the dest is also useful for
677 * scheduling.
678 */
679static struct ip_vs_dest *
7937df15
JV
680ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
681 __be16 dport)
1da177e4
LT
682{
683 struct ip_vs_dest *dest, *nxt;
684
685 /*
686 * Find the destination in trash
687 */
688 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
689 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
690 "dest->refcnt=%d\n",
691 dest->vfwmark,
692 IP_VS_DBG_ADDR(svc->af, &dest->addr),
693 ntohs(dest->port),
694 atomic_read(&dest->refcnt));
695 if (dest->af == svc->af &&
696 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
697 dest->port == dport &&
698 dest->vfwmark == svc->fwmark &&
699 dest->protocol == svc->protocol &&
700 (svc->fwmark ||
7937df15 701 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
702 dest->vport == svc->port))) {
703 /* HIT */
704 return dest;
705 }
706
707 /*
708 * Try to purge the destination from trash if not referenced
709 */
710 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
711 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
712 "from trash\n",
713 dest->vfwmark,
714 IP_VS_DBG_ADDR(svc->af, &dest->addr),
715 ntohs(dest->port));
1da177e4
LT
716 list_del(&dest->n_list);
717 ip_vs_dst_reset(dest);
718 __ip_vs_unbind_svc(dest);
719 kfree(dest);
720 }
721 }
722
723 return NULL;
724}
725
726
727/*
728 * Clean up all the destinations in the trash
729 * Called by the ip_vs_control_cleanup()
730 *
731 * When the ip_vs_control_clearup is activated by ipvs module exit,
732 * the service tables must have been flushed and all the connections
733 * are expired, and the refcnt of each destination in the trash must
734 * be 1, so we simply release them here.
735 */
736static void ip_vs_trash_cleanup(void)
737{
738 struct ip_vs_dest *dest, *nxt;
739
740 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
741 list_del(&dest->n_list);
742 ip_vs_dst_reset(dest);
743 __ip_vs_unbind_svc(dest);
744 kfree(dest);
745 }
746}
747
748
749static void
750ip_vs_zero_stats(struct ip_vs_stats *stats)
751{
752 spin_lock_bh(&stats->lock);
e93615d0 753
e9c0ce23 754 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 755 ip_vs_zero_estimator(stats);
e93615d0 756
3a14a313 757 spin_unlock_bh(&stats->lock);
1da177e4
LT
758}
759
760/*
761 * Update a destination in the given service
762 */
763static void
764__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 765 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
766{
767 int conn_flags;
768
769 /* set the weight and the flags */
770 atomic_set(&dest->weight, udest->weight);
3575792e
JA
771 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
772 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4
LT
773
774 /* check if local node and update the flags */
09571c7a
VB
775#ifdef CONFIG_IP_VS_IPV6
776 if (svc->af == AF_INET6) {
777 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
778 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
779 | IP_VS_CONN_F_LOCALNODE;
780 }
781 } else
782#endif
783 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
784 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
785 | IP_VS_CONN_F_LOCALNODE;
786 }
1da177e4
LT
787
788 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 789 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
790 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
791 } else {
792 /*
793 * Put the real service in ip_vs_rtable if not present.
794 * For now only for NAT!
795 */
796 write_lock_bh(&__ip_vs_rs_lock);
797 ip_vs_rs_hash(dest);
798 write_unlock_bh(&__ip_vs_rs_lock);
799 }
800 atomic_set(&dest->conn_flags, conn_flags);
801
802 /* bind the service */
803 if (!dest->svc) {
804 __ip_vs_bind_svc(dest, svc);
805 } else {
806 if (dest->svc != svc) {
807 __ip_vs_unbind_svc(dest);
808 ip_vs_zero_stats(&dest->stats);
809 __ip_vs_bind_svc(dest, svc);
810 }
811 }
812
813 /* set the dest status flags */
814 dest->flags |= IP_VS_DEST_F_AVAILABLE;
815
816 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
817 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
818 dest->u_threshold = udest->u_threshold;
819 dest->l_threshold = udest->l_threshold;
820}
821
822
823/*
824 * Create a destination for the given service
825 */
826static int
c860c6b1 827ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
828 struct ip_vs_dest **dest_p)
829{
830 struct ip_vs_dest *dest;
831 unsigned atype;
832
833 EnterFunction(2);
834
09571c7a
VB
835#ifdef CONFIG_IP_VS_IPV6
836 if (svc->af == AF_INET6) {
837 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
838 if ((!(atype & IPV6_ADDR_UNICAST) ||
839 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
840 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
841 return -EINVAL;
842 } else
843#endif
844 {
845 atype = inet_addr_type(&init_net, udest->addr.ip);
846 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
847 return -EINVAL;
848 }
1da177e4 849
dee06e47 850 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 851 if (dest == NULL) {
1e3e238e 852 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
853 return -ENOMEM;
854 }
1da177e4 855
c860c6b1 856 dest->af = svc->af;
1da177e4 857 dest->protocol = svc->protocol;
c860c6b1 858 dest->vaddr = svc->addr;
1da177e4
LT
859 dest->vport = svc->port;
860 dest->vfwmark = svc->fwmark;
c860c6b1 861 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
862 dest->port = udest->port;
863
864 atomic_set(&dest->activeconns, 0);
865 atomic_set(&dest->inactconns, 0);
866 atomic_set(&dest->persistconns, 0);
867 atomic_set(&dest->refcnt, 0);
868
869 INIT_LIST_HEAD(&dest->d_list);
870 spin_lock_init(&dest->dst_lock);
871 spin_lock_init(&dest->stats.lock);
872 __ip_vs_update_dest(svc, dest, udest);
873 ip_vs_new_estimator(&dest->stats);
874
875 *dest_p = dest;
876
877 LeaveFunction(2);
878 return 0;
879}
880
881
882/*
883 * Add a destination into an existing service
884 */
885static int
c860c6b1 886ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
887{
888 struct ip_vs_dest *dest;
c860c6b1 889 union nf_inet_addr daddr;
014d730d 890 __be16 dport = udest->port;
1da177e4
LT
891 int ret;
892
893 EnterFunction(2);
894
895 if (udest->weight < 0) {
1e3e238e 896 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
897 return -ERANGE;
898 }
899
900 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
901 pr_err("%s(): lower threshold is higher than upper threshold\n",
902 __func__);
1da177e4
LT
903 return -ERANGE;
904 }
905
c860c6b1
JV
906 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
907
1da177e4
LT
908 /*
909 * Check if the dest already exists in the list
910 */
7937df15
JV
911 dest = ip_vs_lookup_dest(svc, &daddr, dport);
912
1da177e4 913 if (dest != NULL) {
1e3e238e 914 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
915 return -EEXIST;
916 }
917
918 /*
919 * Check if the dest already exists in the trash and
920 * is from the same service
921 */
7937df15
JV
922 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
923
1da177e4 924 if (dest != NULL) {
cfc78c5a
JV
925 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
926 "dest->refcnt=%d, service %u/%s:%u\n",
927 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
928 atomic_read(&dest->refcnt),
929 dest->vfwmark,
930 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
931 ntohs(dest->vport));
932
1da177e4
LT
933 __ip_vs_update_dest(svc, dest, udest);
934
935 /*
936 * Get the destination from the trash
937 */
938 list_del(&dest->n_list);
939
940 ip_vs_new_estimator(&dest->stats);
941
942 write_lock_bh(&__ip_vs_svc_lock);
943
944 /*
945 * Wait until all other svc users go away.
946 */
947 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
948
949 list_add(&dest->n_list, &svc->destinations);
950 svc->num_dests++;
951
952 /* call the update_service function of its scheduler */
82dfb6f3
SW
953 if (svc->scheduler->update_service)
954 svc->scheduler->update_service(svc);
1da177e4
LT
955
956 write_unlock_bh(&__ip_vs_svc_lock);
957 return 0;
958 }
959
960 /*
961 * Allocate and initialize the dest structure
962 */
963 ret = ip_vs_new_dest(svc, udest, &dest);
964 if (ret) {
965 return ret;
966 }
967
968 /*
969 * Add the dest entry into the list
970 */
971 atomic_inc(&dest->refcnt);
972
973 write_lock_bh(&__ip_vs_svc_lock);
974
975 /*
976 * Wait until all other svc users go away.
977 */
978 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
979
980 list_add(&dest->n_list, &svc->destinations);
981 svc->num_dests++;
982
983 /* call the update_service function of its scheduler */
82dfb6f3
SW
984 if (svc->scheduler->update_service)
985 svc->scheduler->update_service(svc);
1da177e4
LT
986
987 write_unlock_bh(&__ip_vs_svc_lock);
988
989 LeaveFunction(2);
990
991 return 0;
992}
993
994
995/*
996 * Edit a destination in the given service
997 */
998static int
c860c6b1 999ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1000{
1001 struct ip_vs_dest *dest;
c860c6b1 1002 union nf_inet_addr daddr;
014d730d 1003 __be16 dport = udest->port;
1da177e4
LT
1004
1005 EnterFunction(2);
1006
1007 if (udest->weight < 0) {
1e3e238e 1008 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
1009 return -ERANGE;
1010 }
1011
1012 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
1013 pr_err("%s(): lower threshold is higher than upper threshold\n",
1014 __func__);
1da177e4
LT
1015 return -ERANGE;
1016 }
1017
c860c6b1
JV
1018 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1019
1da177e4
LT
1020 /*
1021 * Lookup the destination list
1022 */
7937df15
JV
1023 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1024
1da177e4 1025 if (dest == NULL) {
1e3e238e 1026 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1027 return -ENOENT;
1028 }
1029
1030 __ip_vs_update_dest(svc, dest, udest);
1031
1032 write_lock_bh(&__ip_vs_svc_lock);
1033
1034 /* Wait until all other svc users go away */
cae7ca3d 1035 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1036
1037 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1038 if (svc->scheduler->update_service)
1039 svc->scheduler->update_service(svc);
1da177e4
LT
1040
1041 write_unlock_bh(&__ip_vs_svc_lock);
1042
1043 LeaveFunction(2);
1044
1045 return 0;
1046}
1047
1048
1049/*
1050 * Delete a destination (must be already unlinked from the service)
1051 */
1052static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1053{
1054 ip_vs_kill_estimator(&dest->stats);
1055
1056 /*
1057 * Remove it from the d-linked list with the real services.
1058 */
1059 write_lock_bh(&__ip_vs_rs_lock);
1060 ip_vs_rs_unhash(dest);
1061 write_unlock_bh(&__ip_vs_rs_lock);
1062
1063 /*
1064 * Decrease the refcnt of the dest, and free the dest
1065 * if nobody refers to it (refcnt=0). Otherwise, throw
1066 * the destination into the trash.
1067 */
1068 if (atomic_dec_and_test(&dest->refcnt)) {
1069 ip_vs_dst_reset(dest);
1070 /* simply decrease svc->refcnt here, let the caller check
1071 and release the service if nobody refers to it.
1072 Only user context can release destination and service,
1073 and only one user context can update virtual service at a
1074 time, so the operation here is OK */
1075 atomic_dec(&dest->svc->refcnt);
1076 kfree(dest);
1077 } else {
cfc78c5a
JV
1078 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1079 "dest->refcnt=%d\n",
1080 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1081 ntohs(dest->port),
1082 atomic_read(&dest->refcnt));
1da177e4
LT
1083 list_add(&dest->n_list, &ip_vs_dest_trash);
1084 atomic_inc(&dest->refcnt);
1085 }
1086}
1087
1088
1089/*
1090 * Unlink a destination from the given service
1091 */
1092static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1093 struct ip_vs_dest *dest,
1094 int svcupd)
1095{
1096 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1097
1098 /*
1099 * Remove it from the d-linked destination list.
1100 */
1101 list_del(&dest->n_list);
1102 svc->num_dests--;
82dfb6f3
SW
1103
1104 /*
1105 * Call the update_service function of its scheduler
1106 */
1107 if (svcupd && svc->scheduler->update_service)
1108 svc->scheduler->update_service(svc);
1da177e4
LT
1109}
1110
1111
1112/*
1113 * Delete a destination server in the given service
1114 */
1115static int
c860c6b1 1116ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1117{
1118 struct ip_vs_dest *dest;
014d730d 1119 __be16 dport = udest->port;
1da177e4
LT
1120
1121 EnterFunction(2);
1122
7937df15 1123 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1124
1da177e4 1125 if (dest == NULL) {
1e3e238e 1126 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1127 return -ENOENT;
1128 }
1129
1130 write_lock_bh(&__ip_vs_svc_lock);
1131
1132 /*
1133 * Wait until all other svc users go away.
1134 */
1135 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1136
1137 /*
1138 * Unlink dest from the service
1139 */
1140 __ip_vs_unlink_dest(svc, dest, 1);
1141
1142 write_unlock_bh(&__ip_vs_svc_lock);
1143
1144 /*
1145 * Delete the destination
1146 */
1147 __ip_vs_del_dest(dest);
1148
1149 LeaveFunction(2);
1150
1151 return 0;
1152}
1153
1154
1155/*
1156 * Add a service into the service hash table
1157 */
1158static int
c860c6b1
JV
1159ip_vs_add_service(struct ip_vs_service_user_kern *u,
1160 struct ip_vs_service **svc_p)
1da177e4
LT
1161{
1162 int ret = 0;
1163 struct ip_vs_scheduler *sched = NULL;
1164 struct ip_vs_service *svc = NULL;
1165
1166 /* increase the module use count */
1167 ip_vs_use_count_inc();
1168
1169 /* Lookup the scheduler by 'u->sched_name' */
1170 sched = ip_vs_scheduler_get(u->sched_name);
1171 if (sched == NULL) {
1e3e238e 1172 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1173 ret = -ENOENT;
1174 goto out_mod_dec;
1175 }
1176
f94fd041 1177#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1178 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1179 ret = -EINVAL;
1180 goto out_err;
f94fd041
JV
1181 }
1182#endif
1183
dee06e47 1184 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1185 if (svc == NULL) {
1e3e238e 1186 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1187 ret = -ENOMEM;
1188 goto out_err;
1189 }
1da177e4
LT
1190
1191 /* I'm the first user of the service */
1192 atomic_set(&svc->usecnt, 1);
1193 atomic_set(&svc->refcnt, 0);
1194
c860c6b1 1195 svc->af = u->af;
1da177e4 1196 svc->protocol = u->protocol;
c860c6b1 1197 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1198 svc->port = u->port;
1199 svc->fwmark = u->fwmark;
1200 svc->flags = u->flags;
1201 svc->timeout = u->timeout * HZ;
1202 svc->netmask = u->netmask;
1203
1204 INIT_LIST_HEAD(&svc->destinations);
1205 rwlock_init(&svc->sched_lock);
1206 spin_lock_init(&svc->stats.lock);
1207
1208 /* Bind the scheduler */
1209 ret = ip_vs_bind_scheduler(svc, sched);
1210 if (ret)
1211 goto out_err;
1212 sched = NULL;
1213
1214 /* Update the virtual service counters */
1215 if (svc->port == FTPPORT)
1216 atomic_inc(&ip_vs_ftpsvc_counter);
1217 else if (svc->port == 0)
1218 atomic_inc(&ip_vs_nullsvc_counter);
1219
1220 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1221
1222 /* Count only IPv4 services for old get/setsockopt interface */
1223 if (svc->af == AF_INET)
1224 ip_vs_num_services++;
1da177e4
LT
1225
1226 /* Hash the service into the service table */
1227 write_lock_bh(&__ip_vs_svc_lock);
1228 ip_vs_svc_hash(svc);
1229 write_unlock_bh(&__ip_vs_svc_lock);
1230
1231 *svc_p = svc;
1232 return 0;
1233
1234 out_err:
1235 if (svc != NULL) {
1236 if (svc->scheduler)
1237 ip_vs_unbind_scheduler(svc);
1238 if (svc->inc) {
1239 local_bh_disable();
1240 ip_vs_app_inc_put(svc->inc);
1241 local_bh_enable();
1242 }
1243 kfree(svc);
1244 }
1245 ip_vs_scheduler_put(sched);
1246
1247 out_mod_dec:
1248 /* decrease the module use count */
1249 ip_vs_use_count_dec();
1250
1251 return ret;
1252}
1253
1254
1255/*
1256 * Edit a service and bind it with a new scheduler
1257 */
1258static int
c860c6b1 1259ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1260{
1261 struct ip_vs_scheduler *sched, *old_sched;
1262 int ret = 0;
1263
1264 /*
1265 * Lookup the scheduler, by 'u->sched_name'
1266 */
1267 sched = ip_vs_scheduler_get(u->sched_name);
1268 if (sched == NULL) {
1e3e238e 1269 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1270 return -ENOENT;
1271 }
1272 old_sched = sched;
1273
f94fd041 1274#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1275 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1276 ret = -EINVAL;
1277 goto out;
f94fd041
JV
1278 }
1279#endif
1280
1da177e4
LT
1281 write_lock_bh(&__ip_vs_svc_lock);
1282
1283 /*
1284 * Wait until all other svc users go away.
1285 */
1286 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1287
1288 /*
1289 * Set the flags and timeout value
1290 */
1291 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1292 svc->timeout = u->timeout * HZ;
1293 svc->netmask = u->netmask;
1294
1295 old_sched = svc->scheduler;
1296 if (sched != old_sched) {
1297 /*
1298 * Unbind the old scheduler
1299 */
1300 if ((ret = ip_vs_unbind_scheduler(svc))) {
1301 old_sched = sched;
9e691ed6 1302 goto out_unlock;
1da177e4
LT
1303 }
1304
1305 /*
1306 * Bind the new scheduler
1307 */
1308 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1309 /*
1310 * If ip_vs_bind_scheduler fails, restore the old
1311 * scheduler.
1312 * The main reason of failure is out of memory.
1313 *
1314 * The question is if the old scheduler can be
1315 * restored all the time. TODO: if it cannot be
1316 * restored some time, we must delete the service,
1317 * otherwise the system may crash.
1318 */
1319 ip_vs_bind_scheduler(svc, old_sched);
1320 old_sched = sched;
9e691ed6 1321 goto out_unlock;
1da177e4
LT
1322 }
1323 }
1324
9e691ed6 1325 out_unlock:
1da177e4 1326 write_unlock_bh(&__ip_vs_svc_lock);
8d5803bf 1327#ifdef CONFIG_IP_VS_IPV6
9e691ed6 1328 out:
8d5803bf 1329#endif
1da177e4
LT
1330
1331 if (old_sched)
1332 ip_vs_scheduler_put(old_sched);
1333
1334 return ret;
1335}
1336
1337
1338/*
1339 * Delete a service from the service list
1340 * - The service must be unlinked, unlocked and not referenced!
1341 * - We are called under _bh lock
1342 */
1343static void __ip_vs_del_service(struct ip_vs_service *svc)
1344{
1345 struct ip_vs_dest *dest, *nxt;
1346 struct ip_vs_scheduler *old_sched;
1347
f94fd041
JV
1348 /* Count only IPv4 services for old get/setsockopt interface */
1349 if (svc->af == AF_INET)
1350 ip_vs_num_services--;
1351
1da177e4
LT
1352 ip_vs_kill_estimator(&svc->stats);
1353
1354 /* Unbind scheduler */
1355 old_sched = svc->scheduler;
1356 ip_vs_unbind_scheduler(svc);
1357 if (old_sched)
1358 ip_vs_scheduler_put(old_sched);
1359
1360 /* Unbind app inc */
1361 if (svc->inc) {
1362 ip_vs_app_inc_put(svc->inc);
1363 svc->inc = NULL;
1364 }
1365
1366 /*
1367 * Unlink the whole destination list
1368 */
1369 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1370 __ip_vs_unlink_dest(svc, dest, 0);
1371 __ip_vs_del_dest(dest);
1372 }
1373
1374 /*
1375 * Update the virtual service counters
1376 */
1377 if (svc->port == FTPPORT)
1378 atomic_dec(&ip_vs_ftpsvc_counter);
1379 else if (svc->port == 0)
1380 atomic_dec(&ip_vs_nullsvc_counter);
1381
1382 /*
1383 * Free the service if nobody refers to it
1384 */
1385 if (atomic_read(&svc->refcnt) == 0)
1386 kfree(svc);
1387
1388 /* decrease the module use count */
1389 ip_vs_use_count_dec();
1390}
1391
1392/*
1393 * Delete a service from the service list
1394 */
1395static int ip_vs_del_service(struct ip_vs_service *svc)
1396{
1397 if (svc == NULL)
1398 return -EEXIST;
1399
1400 /*
1401 * Unhash it from the service table
1402 */
1403 write_lock_bh(&__ip_vs_svc_lock);
1404
1405 ip_vs_svc_unhash(svc);
1406
1407 /*
1408 * Wait until all the svc users go away.
1409 */
1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1411
1412 __ip_vs_del_service(svc);
1413
1414 write_unlock_bh(&__ip_vs_svc_lock);
1415
1416 return 0;
1417}
1418
1419
1420/*
1421 * Flush all the virtual services
1422 */
1423static int ip_vs_flush(void)
1424{
1425 int idx;
1426 struct ip_vs_service *svc, *nxt;
1427
1428 /*
1429 * Flush the service table hashed by <protocol,addr,port>
1430 */
1431 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1432 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1433 write_lock_bh(&__ip_vs_svc_lock);
1434 ip_vs_svc_unhash(svc);
1435 /*
1436 * Wait until all the svc users go away.
1437 */
1438 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1439 __ip_vs_del_service(svc);
1440 write_unlock_bh(&__ip_vs_svc_lock);
1441 }
1442 }
1443
1444 /*
1445 * Flush the service table hashed by fwmark
1446 */
1447 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1448 list_for_each_entry_safe(svc, nxt,
1449 &ip_vs_svc_fwm_table[idx], f_list) {
1450 write_lock_bh(&__ip_vs_svc_lock);
1451 ip_vs_svc_unhash(svc);
1452 /*
1453 * Wait until all the svc users go away.
1454 */
1455 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1456 __ip_vs_del_service(svc);
1457 write_unlock_bh(&__ip_vs_svc_lock);
1458 }
1459 }
1460
1461 return 0;
1462}
1463
1464
1465/*
1466 * Zero counters in a service or all services
1467 */
1468static int ip_vs_zero_service(struct ip_vs_service *svc)
1469{
1470 struct ip_vs_dest *dest;
1471
1472 write_lock_bh(&__ip_vs_svc_lock);
1473 list_for_each_entry(dest, &svc->destinations, n_list) {
1474 ip_vs_zero_stats(&dest->stats);
1475 }
1476 ip_vs_zero_stats(&svc->stats);
1477 write_unlock_bh(&__ip_vs_svc_lock);
1478 return 0;
1479}
1480
1481static int ip_vs_zero_all(void)
1482{
1483 int idx;
1484 struct ip_vs_service *svc;
1485
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1488 ip_vs_zero_service(svc);
1489 }
1490 }
1491
1492 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1493 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1494 ip_vs_zero_service(svc);
1495 }
1496 }
1497
1498 ip_vs_zero_stats(&ip_vs_stats);
1499 return 0;
1500}
1501
1502
1503static int
8d65af78 1504proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1505 void __user *buffer, size_t *lenp, loff_t *ppos)
1506{
1507 int *valp = table->data;
1508 int val = *valp;
1509 int rc;
1510
8d65af78 1511 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1512 if (write && (*valp != val)) {
1513 if ((*valp < 0) || (*valp > 3)) {
1514 /* Restore the correct value */
1515 *valp = val;
1516 } else {
1da177e4 1517 update_defense_level();
1da177e4
LT
1518 }
1519 }
1520 return rc;
1521}
1522
1523
1524static int
8d65af78 1525proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1526 void __user *buffer, size_t *lenp, loff_t *ppos)
1527{
1528 int *valp = table->data;
1529 int val[2];
1530 int rc;
1531
1532 /* backup the value first */
1533 memcpy(val, valp, sizeof(val));
1534
8d65af78 1535 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1536 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1537 /* Restore the correct value */
1538 memcpy(valp, val, sizeof(val));
1539 }
1540 return rc;
1541}
1542
1543
1544/*
1545 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1546 */
1547
1548static struct ctl_table vs_vars[] = {
1549 {
1da177e4
LT
1550 .procname = "amemthresh",
1551 .data = &sysctl_ip_vs_amemthresh,
1552 .maxlen = sizeof(int),
1553 .mode = 0644,
6d9f239a 1554 .proc_handler = proc_dointvec,
1da177e4
LT
1555 },
1556#ifdef CONFIG_IP_VS_DEBUG
1557 {
1da177e4
LT
1558 .procname = "debug_level",
1559 .data = &sysctl_ip_vs_debug_level,
1560 .maxlen = sizeof(int),
1561 .mode = 0644,
6d9f239a 1562 .proc_handler = proc_dointvec,
1da177e4
LT
1563 },
1564#endif
1565 {
1da177e4
LT
1566 .procname = "am_droprate",
1567 .data = &sysctl_ip_vs_am_droprate,
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
6d9f239a 1570 .proc_handler = proc_dointvec,
1da177e4
LT
1571 },
1572 {
1da177e4
LT
1573 .procname = "drop_entry",
1574 .data = &sysctl_ip_vs_drop_entry,
1575 .maxlen = sizeof(int),
1576 .mode = 0644,
6d9f239a 1577 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1578 },
1579 {
1da177e4
LT
1580 .procname = "drop_packet",
1581 .data = &sysctl_ip_vs_drop_packet,
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
6d9f239a 1584 .proc_handler = proc_do_defense_mode,
1da177e4 1585 },
f4bc17cd
JA
1586#ifdef CONFIG_IP_VS_NFCT
1587 {
1588 .procname = "conntrack",
1589 .data = &sysctl_ip_vs_conntrack,
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec,
1593 },
1594#endif
1da177e4 1595 {
1da177e4
LT
1596 .procname = "secure_tcp",
1597 .data = &sysctl_ip_vs_secure_tcp,
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
6d9f239a 1600 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1601 },
1602#if 0
1603 {
1da177e4
LT
1604 .procname = "timeout_established",
1605 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
6d9f239a 1608 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1609 },
1610 {
1da177e4
LT
1611 .procname = "timeout_synsent",
1612 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1613 .maxlen = sizeof(int),
1614 .mode = 0644,
6d9f239a 1615 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1616 },
1617 {
1da177e4
LT
1618 .procname = "timeout_synrecv",
1619 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
6d9f239a 1622 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1623 },
1624 {
1da177e4
LT
1625 .procname = "timeout_finwait",
1626 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1627 .maxlen = sizeof(int),
1628 .mode = 0644,
6d9f239a 1629 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1630 },
1631 {
1da177e4
LT
1632 .procname = "timeout_timewait",
1633 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1634 .maxlen = sizeof(int),
1635 .mode = 0644,
6d9f239a 1636 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1637 },
1638 {
1da177e4
LT
1639 .procname = "timeout_close",
1640 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1641 .maxlen = sizeof(int),
1642 .mode = 0644,
6d9f239a 1643 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1644 },
1645 {
1da177e4
LT
1646 .procname = "timeout_closewait",
1647 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
6d9f239a 1650 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1651 },
1652 {
1da177e4
LT
1653 .procname = "timeout_lastack",
1654 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1655 .maxlen = sizeof(int),
1656 .mode = 0644,
6d9f239a 1657 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1658 },
1659 {
1da177e4
LT
1660 .procname = "timeout_listen",
1661 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1662 .maxlen = sizeof(int),
1663 .mode = 0644,
6d9f239a 1664 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1665 },
1666 {
1da177e4
LT
1667 .procname = "timeout_synack",
1668 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
6d9f239a 1671 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1672 },
1673 {
1da177e4
LT
1674 .procname = "timeout_udp",
1675 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1676 .maxlen = sizeof(int),
1677 .mode = 0644,
6d9f239a 1678 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1679 },
1680 {
1da177e4
LT
1681 .procname = "timeout_icmp",
1682 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1683 .maxlen = sizeof(int),
1684 .mode = 0644,
6d9f239a 1685 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1686 },
1687#endif
1688 {
1da177e4
LT
1689 .procname = "cache_bypass",
1690 .data = &sysctl_ip_vs_cache_bypass,
1691 .maxlen = sizeof(int),
1692 .mode = 0644,
6d9f239a 1693 .proc_handler = proc_dointvec,
1da177e4
LT
1694 },
1695 {
1da177e4
LT
1696 .procname = "expire_nodest_conn",
1697 .data = &sysctl_ip_vs_expire_nodest_conn,
1698 .maxlen = sizeof(int),
1699 .mode = 0644,
6d9f239a 1700 .proc_handler = proc_dointvec,
1da177e4
LT
1701 },
1702 {
1da177e4
LT
1703 .procname = "expire_quiescent_template",
1704 .data = &sysctl_ip_vs_expire_quiescent_template,
1705 .maxlen = sizeof(int),
1706 .mode = 0644,
6d9f239a 1707 .proc_handler = proc_dointvec,
1da177e4
LT
1708 },
1709 {
1da177e4
LT
1710 .procname = "sync_threshold",
1711 .data = &sysctl_ip_vs_sync_threshold,
1712 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1713 .mode = 0644,
6d9f239a 1714 .proc_handler = proc_do_sync_threshold,
1da177e4
LT
1715 },
1716 {
1da177e4
LT
1717 .procname = "nat_icmp_send",
1718 .data = &sysctl_ip_vs_nat_icmp_send,
1719 .maxlen = sizeof(int),
1720 .mode = 0644,
6d9f239a 1721 .proc_handler = proc_dointvec,
1da177e4 1722 },
f8572d8f 1723 { }
1da177e4
LT
1724};
1725
5587da55 1726const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1727 { .procname = "net", },
1728 { .procname = "ipv4", },
90754f8e
PE
1729 { .procname = "vs", },
1730 { }
1da177e4 1731};
90754f8e 1732EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1733
1734static struct ctl_table_header * sysctl_header;
1735
1736#ifdef CONFIG_PROC_FS
1737
1738struct ip_vs_iter {
1739 struct list_head *table;
1740 int bucket;
1741};
1742
1743/*
1744 * Write the contents of the VS rule table to a PROCfs file.
1745 * (It is kept just for backward compatibility)
1746 */
1747static inline const char *ip_vs_fwd_name(unsigned flags)
1748{
1749 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1750 case IP_VS_CONN_F_LOCALNODE:
1751 return "Local";
1752 case IP_VS_CONN_F_TUNNEL:
1753 return "Tunnel";
1754 case IP_VS_CONN_F_DROUTE:
1755 return "Route";
1756 default:
1757 return "Masq";
1758 }
1759}
1760
1761
1762/* Get the Nth entry in the two lists */
1763static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1764{
1765 struct ip_vs_iter *iter = seq->private;
1766 int idx;
1767 struct ip_vs_service *svc;
1768
1769 /* look in hash by protocol */
1770 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1771 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1772 if (pos-- == 0){
1773 iter->table = ip_vs_svc_table;
1774 iter->bucket = idx;
1775 return svc;
1776 }
1777 }
1778 }
1779
1780 /* keep looking in fwmark */
1781 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1782 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1783 if (pos-- == 0) {
1784 iter->table = ip_vs_svc_fwm_table;
1785 iter->bucket = idx;
1786 return svc;
1787 }
1788 }
1789 }
1790
1791 return NULL;
1792}
1793
1794static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1795__acquires(__ip_vs_svc_lock)
1da177e4
LT
1796{
1797
1798 read_lock_bh(&__ip_vs_svc_lock);
1799 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1800}
1801
1802
1803static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1804{
1805 struct list_head *e;
1806 struct ip_vs_iter *iter;
1807 struct ip_vs_service *svc;
1808
1809 ++*pos;
1810 if (v == SEQ_START_TOKEN)
1811 return ip_vs_info_array(seq,0);
1812
1813 svc = v;
1814 iter = seq->private;
1815
1816 if (iter->table == ip_vs_svc_table) {
1817 /* next service in table hashed by protocol */
1818 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1819 return list_entry(e, struct ip_vs_service, s_list);
1820
1821
1822 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1823 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1824 s_list) {
1825 return svc;
1826 }
1827 }
1828
1829 iter->table = ip_vs_svc_fwm_table;
1830 iter->bucket = -1;
1831 goto scan_fwmark;
1832 }
1833
1834 /* next service in hashed by fwmark */
1835 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1836 return list_entry(e, struct ip_vs_service, f_list);
1837
1838 scan_fwmark:
1839 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1840 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1841 f_list)
1842 return svc;
1843 }
1844
1845 return NULL;
1846}
1847
1848static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1849__releases(__ip_vs_svc_lock)
1da177e4
LT
1850{
1851 read_unlock_bh(&__ip_vs_svc_lock);
1852}
1853
1854
1855static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1856{
1857 if (v == SEQ_START_TOKEN) {
1858 seq_printf(seq,
1859 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1860 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1861 seq_puts(seq,
1862 "Prot LocalAddress:Port Scheduler Flags\n");
1863 seq_puts(seq,
1864 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1865 } else {
1866 const struct ip_vs_service *svc = v;
1867 const struct ip_vs_iter *iter = seq->private;
1868 const struct ip_vs_dest *dest;
1869
667a5f18
VB
1870 if (iter->table == ip_vs_svc_table) {
1871#ifdef CONFIG_IP_VS_IPV6
1872 if (svc->af == AF_INET6)
5b095d98 1873 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1874 ip_vs_proto_name(svc->protocol),
38ff4fa4 1875 &svc->addr.in6,
667a5f18
VB
1876 ntohs(svc->port),
1877 svc->scheduler->name);
1878 else
1879#endif
26ec037f 1880 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1881 ip_vs_proto_name(svc->protocol),
1882 ntohl(svc->addr.ip),
1883 ntohs(svc->port),
26ec037f
NC
1884 svc->scheduler->name,
1885 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1886 } else {
26ec037f
NC
1887 seq_printf(seq, "FWM %08X %s %s",
1888 svc->fwmark, svc->scheduler->name,
1889 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1890 }
1da177e4
LT
1891
1892 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1893 seq_printf(seq, "persistent %d %08X\n",
1894 svc->timeout,
1895 ntohl(svc->netmask));
1896 else
1897 seq_putc(seq, '\n');
1898
1899 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1900#ifdef CONFIG_IP_VS_IPV6
1901 if (dest->af == AF_INET6)
1902 seq_printf(seq,
5b095d98 1903 " -> [%pI6]:%04X"
667a5f18 1904 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1905 &dest->addr.in6,
667a5f18
VB
1906 ntohs(dest->port),
1907 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1908 atomic_read(&dest->weight),
1909 atomic_read(&dest->activeconns),
1910 atomic_read(&dest->inactconns));
1911 else
1912#endif
1913 seq_printf(seq,
1914 " -> %08X:%04X "
1915 "%-7s %-6d %-10d %-10d\n",
1916 ntohl(dest->addr.ip),
1917 ntohs(dest->port),
1918 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1919 atomic_read(&dest->weight),
1920 atomic_read(&dest->activeconns),
1921 atomic_read(&dest->inactconns));
1922
1da177e4
LT
1923 }
1924 }
1925 return 0;
1926}
1927
56b3d975 1928static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1929 .start = ip_vs_info_seq_start,
1930 .next = ip_vs_info_seq_next,
1931 .stop = ip_vs_info_seq_stop,
1932 .show = ip_vs_info_seq_show,
1933};
1934
1935static int ip_vs_info_open(struct inode *inode, struct file *file)
1936{
cf7732e4
PE
1937 return seq_open_private(file, &ip_vs_info_seq_ops,
1938 sizeof(struct ip_vs_iter));
1da177e4
LT
1939}
1940
9a32144e 1941static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1942 .owner = THIS_MODULE,
1943 .open = ip_vs_info_open,
1944 .read = seq_read,
1945 .llseek = seq_lseek,
1946 .release = seq_release_private,
1947};
1948
1949#endif
1950
519e49e8
SW
1951struct ip_vs_stats ip_vs_stats = {
1952 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1953};
1da177e4
LT
1954
1955#ifdef CONFIG_PROC_FS
1956static int ip_vs_stats_show(struct seq_file *seq, void *v)
1957{
1958
1959/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1960 seq_puts(seq,
1961 " Total Incoming Outgoing Incoming Outgoing\n");
1962 seq_printf(seq,
1963 " Conns Packets Packets Bytes Bytes\n");
1964
1965 spin_lock_bh(&ip_vs_stats.lock);
e9c0ce23
SW
1966 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1967 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1968 (unsigned long long) ip_vs_stats.ustats.inbytes,
1969 (unsigned long long) ip_vs_stats.ustats.outbytes);
1da177e4
LT
1970
1971/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1972 seq_puts(seq,
1973 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1974 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
e9c0ce23
SW
1975 ip_vs_stats.ustats.cps,
1976 ip_vs_stats.ustats.inpps,
1977 ip_vs_stats.ustats.outpps,
1978 ip_vs_stats.ustats.inbps,
1979 ip_vs_stats.ustats.outbps);
1da177e4
LT
1980 spin_unlock_bh(&ip_vs_stats.lock);
1981
1982 return 0;
1983}
1984
1985static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1986{
1987 return single_open(file, ip_vs_stats_show, NULL);
1988}
1989
9a32144e 1990static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1991 .owner = THIS_MODULE,
1992 .open = ip_vs_stats_seq_open,
1993 .read = seq_read,
1994 .llseek = seq_lseek,
1995 .release = single_release,
1996};
1997
1998#endif
1999
2000/*
2001 * Set timeout values for tcp tcpfin udp in the timeout_table.
2002 */
2003static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2004{
2005 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2006 u->tcp_timeout,
2007 u->tcp_fin_timeout,
2008 u->udp_timeout);
2009
2010#ifdef CONFIG_IP_VS_PROTO_TCP
2011 if (u->tcp_timeout) {
2012 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2013 = u->tcp_timeout * HZ;
2014 }
2015
2016 if (u->tcp_fin_timeout) {
2017 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2018 = u->tcp_fin_timeout * HZ;
2019 }
2020#endif
2021
2022#ifdef CONFIG_IP_VS_PROTO_UDP
2023 if (u->udp_timeout) {
2024 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2025 = u->udp_timeout * HZ;
2026 }
2027#endif
2028 return 0;
2029}
2030
2031
2032#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2033#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2034#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2035 sizeof(struct ip_vs_dest_user))
2036#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2037#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2038#define MAX_ARG_LEN SVCDEST_ARG_LEN
2039
9b5b5cff 2040static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2041 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2045 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2046 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2052};
2053
c860c6b1
JV
2054static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2055 struct ip_vs_service_user *usvc_compat)
2056{
2057 usvc->af = AF_INET;
2058 usvc->protocol = usvc_compat->protocol;
2059 usvc->addr.ip = usvc_compat->addr;
2060 usvc->port = usvc_compat->port;
2061 usvc->fwmark = usvc_compat->fwmark;
2062
2063 /* Deep copy of sched_name is not needed here */
2064 usvc->sched_name = usvc_compat->sched_name;
2065
2066 usvc->flags = usvc_compat->flags;
2067 usvc->timeout = usvc_compat->timeout;
2068 usvc->netmask = usvc_compat->netmask;
2069}
2070
2071static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2072 struct ip_vs_dest_user *udest_compat)
2073{
2074 udest->addr.ip = udest_compat->addr;
2075 udest->port = udest_compat->port;
2076 udest->conn_flags = udest_compat->conn_flags;
2077 udest->weight = udest_compat->weight;
2078 udest->u_threshold = udest_compat->u_threshold;
2079 udest->l_threshold = udest_compat->l_threshold;
2080}
2081
1da177e4
LT
2082static int
2083do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2084{
2085 int ret;
2086 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2087 struct ip_vs_service_user *usvc_compat;
2088 struct ip_vs_service_user_kern usvc;
1da177e4 2089 struct ip_vs_service *svc;
c860c6b1
JV
2090 struct ip_vs_dest_user *udest_compat;
2091 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2092
2093 if (!capable(CAP_NET_ADMIN))
2094 return -EPERM;
2095
04bcef2a
AV
2096 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2097 return -EINVAL;
2098 if (len < 0 || len > MAX_ARG_LEN)
2099 return -EINVAL;
1da177e4 2100 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2101 pr_err("set_ctl: len %u != %u\n",
2102 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2103 return -EINVAL;
2104 }
2105
2106 if (copy_from_user(arg, user, len) != 0)
2107 return -EFAULT;
2108
2109 /* increase the module use count */
2110 ip_vs_use_count_inc();
2111
14cc3e2b 2112 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2113 ret = -ERESTARTSYS;
2114 goto out_dec;
2115 }
2116
2117 if (cmd == IP_VS_SO_SET_FLUSH) {
2118 /* Flush the virtual service */
2119 ret = ip_vs_flush();
2120 goto out_unlock;
2121 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2122 /* Set timeout values for (tcp tcpfin udp) */
2123 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2124 goto out_unlock;
2125 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2126 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2127 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2128 goto out_unlock;
2129 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2130 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2131 ret = stop_sync_thread(dm->state);
2132 goto out_unlock;
2133 }
2134
c860c6b1
JV
2135 usvc_compat = (struct ip_vs_service_user *)arg;
2136 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2137
2138 /* We only use the new structs internally, so copy userspace compat
2139 * structs to extended internal versions */
2140 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2141 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2142
2143 if (cmd == IP_VS_SO_SET_ZERO) {
2144 /* if no service address is set, zero counters in all */
c860c6b1 2145 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2146 ret = ip_vs_zero_all();
2147 goto out_unlock;
2148 }
2149 }
2150
2906f66a
VMR
2151 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2152 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2153 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2154 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2155 usvc.protocol, &usvc.addr.ip,
2156 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2157 ret = -EFAULT;
2158 goto out_unlock;
2159 }
2160
2161 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2162 if (usvc.fwmark == 0)
b18610de
JV
2163 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2164 &usvc.addr, usvc.port);
1da177e4 2165 else
b18610de 2166 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2167
2168 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2169 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2170 ret = -ESRCH;
b2aff963 2171 goto out_drop_service;
1da177e4
LT
2172 }
2173
2174 switch (cmd) {
2175 case IP_VS_SO_SET_ADD:
2176 if (svc != NULL)
2177 ret = -EEXIST;
2178 else
c860c6b1 2179 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2180 break;
2181 case IP_VS_SO_SET_EDIT:
c860c6b1 2182 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2183 break;
2184 case IP_VS_SO_SET_DEL:
2185 ret = ip_vs_del_service(svc);
2186 if (!ret)
2187 goto out_unlock;
2188 break;
2189 case IP_VS_SO_SET_ZERO:
2190 ret = ip_vs_zero_service(svc);
2191 break;
2192 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2193 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2194 break;
2195 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2196 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2197 break;
2198 case IP_VS_SO_SET_DELDEST:
c860c6b1 2199 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2200 break;
2201 default:
2202 ret = -EINVAL;
2203 }
2204
b2aff963 2205out_drop_service:
1da177e4
LT
2206 if (svc)
2207 ip_vs_service_put(svc);
2208
2209 out_unlock:
14cc3e2b 2210 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2211 out_dec:
2212 /* decrease the module use count */
2213 ip_vs_use_count_dec();
2214
2215 return ret;
2216}
2217
2218
2219static void
2220ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2221{
2222 spin_lock_bh(&src->lock);
e9c0ce23 2223 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2224 spin_unlock_bh(&src->lock);
2225}
2226
2227static void
2228ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2229{
2230 dst->protocol = src->protocol;
e7ade46a 2231 dst->addr = src->addr.ip;
1da177e4
LT
2232 dst->port = src->port;
2233 dst->fwmark = src->fwmark;
4da62fc7 2234 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2235 dst->flags = src->flags;
2236 dst->timeout = src->timeout / HZ;
2237 dst->netmask = src->netmask;
2238 dst->num_dests = src->num_dests;
2239 ip_vs_copy_stats(&dst->stats, &src->stats);
2240}
2241
2242static inline int
2243__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2244 struct ip_vs_get_services __user *uptr)
2245{
2246 int idx, count=0;
2247 struct ip_vs_service *svc;
2248 struct ip_vs_service_entry entry;
2249 int ret = 0;
2250
2251 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2252 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2253 /* Only expose IPv4 entries to old interface */
2254 if (svc->af != AF_INET)
2255 continue;
2256
1da177e4
LT
2257 if (count >= get->num_services)
2258 goto out;
4da62fc7 2259 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2260 ip_vs_copy_service(&entry, svc);
2261 if (copy_to_user(&uptr->entrytable[count],
2262 &entry, sizeof(entry))) {
2263 ret = -EFAULT;
2264 goto out;
2265 }
2266 count++;
2267 }
2268 }
2269
2270 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2271 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2272 /* Only expose IPv4 entries to old interface */
2273 if (svc->af != AF_INET)
2274 continue;
2275
1da177e4
LT
2276 if (count >= get->num_services)
2277 goto out;
4da62fc7 2278 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2279 ip_vs_copy_service(&entry, svc);
2280 if (copy_to_user(&uptr->entrytable[count],
2281 &entry, sizeof(entry))) {
2282 ret = -EFAULT;
2283 goto out;
2284 }
2285 count++;
2286 }
2287 }
2288 out:
2289 return ret;
2290}
2291
2292static inline int
2293__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2294 struct ip_vs_get_dests __user *uptr)
2295{
2296 struct ip_vs_service *svc;
b18610de 2297 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2298 int ret = 0;
2299
2300 if (get->fwmark)
b18610de 2301 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2302 else
b18610de
JV
2303 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2304 get->port);
2305
1da177e4
LT
2306 if (svc) {
2307 int count = 0;
2308 struct ip_vs_dest *dest;
2309 struct ip_vs_dest_entry entry;
2310
2311 list_for_each_entry(dest, &svc->destinations, n_list) {
2312 if (count >= get->num_dests)
2313 break;
2314
e7ade46a 2315 entry.addr = dest->addr.ip;
1da177e4
LT
2316 entry.port = dest->port;
2317 entry.conn_flags = atomic_read(&dest->conn_flags);
2318 entry.weight = atomic_read(&dest->weight);
2319 entry.u_threshold = dest->u_threshold;
2320 entry.l_threshold = dest->l_threshold;
2321 entry.activeconns = atomic_read(&dest->activeconns);
2322 entry.inactconns = atomic_read(&dest->inactconns);
2323 entry.persistconns = atomic_read(&dest->persistconns);
2324 ip_vs_copy_stats(&entry.stats, &dest->stats);
2325 if (copy_to_user(&uptr->entrytable[count],
2326 &entry, sizeof(entry))) {
2327 ret = -EFAULT;
2328 break;
2329 }
2330 count++;
2331 }
2332 ip_vs_service_put(svc);
2333 } else
2334 ret = -ESRCH;
2335 return ret;
2336}
2337
2338static inline void
2339__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2340{
2341#ifdef CONFIG_IP_VS_PROTO_TCP
2342 u->tcp_timeout =
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout =
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346#endif
2347#ifdef CONFIG_IP_VS_PROTO_UDP
2348 u->udp_timeout =
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350#endif
2351}
2352
2353
2354#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2355#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2356#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2357#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2358#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2359#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2360#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2361
9b5b5cff 2362static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2363 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2364 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2365 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2366 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2367 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2368 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2369 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2370};
2371
2372static int
2373do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2374{
2375 unsigned char arg[128];
2376 int ret = 0;
04bcef2a 2377 unsigned int copylen;
1da177e4
LT
2378
2379 if (!capable(CAP_NET_ADMIN))
2380 return -EPERM;
2381
04bcef2a
AV
2382 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2383 return -EINVAL;
2384
1da177e4 2385 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2386 pr_err("get_ctl: len %u < %u\n",
2387 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2388 return -EINVAL;
2389 }
2390
04bcef2a
AV
2391 copylen = get_arglen[GET_CMDID(cmd)];
2392 if (copylen > 128)
2393 return -EINVAL;
2394
2395 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2396 return -EFAULT;
2397
14cc3e2b 2398 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2399 return -ERESTARTSYS;
2400
2401 switch (cmd) {
2402 case IP_VS_SO_GET_VERSION:
2403 {
2404 char buf[64];
2405
2406 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2407 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2408 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2409 ret = -EFAULT;
2410 goto out;
2411 }
2412 *len = strlen(buf)+1;
2413 }
2414 break;
2415
2416 case IP_VS_SO_GET_INFO:
2417 {
2418 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE;
6f7edb48 2420 info.size = ip_vs_conn_tab_size;
1da177e4
LT
2421 info.num_services = ip_vs_num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423 ret = -EFAULT;
2424 }
2425 break;
2426
2427 case IP_VS_SO_GET_SERVICES:
2428 {
2429 struct ip_vs_get_services *get;
2430 int size;
2431
2432 get = (struct ip_vs_get_services *)arg;
2433 size = sizeof(*get) +
2434 sizeof(struct ip_vs_service_entry) * get->num_services;
2435 if (*len != size) {
1e3e238e 2436 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2437 ret = -EINVAL;
2438 goto out;
2439 }
2440 ret = __ip_vs_get_service_entries(get, user);
2441 }
2442 break;
2443
2444 case IP_VS_SO_GET_SERVICE:
2445 {
2446 struct ip_vs_service_entry *entry;
2447 struct ip_vs_service *svc;
b18610de 2448 union nf_inet_addr addr;
1da177e4
LT
2449
2450 entry = (struct ip_vs_service_entry *)arg;
b18610de 2451 addr.ip = entry->addr;
1da177e4 2452 if (entry->fwmark)
b18610de 2453 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2454 else
b18610de
JV
2455 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2456 &addr, entry->port);
1da177e4
LT
2457 if (svc) {
2458 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2460 ret = -EFAULT;
2461 ip_vs_service_put(svc);
2462 } else
2463 ret = -ESRCH;
2464 }
2465 break;
2466
2467 case IP_VS_SO_GET_DESTS:
2468 {
2469 struct ip_vs_get_dests *get;
2470 int size;
2471
2472 get = (struct ip_vs_get_dests *)arg;
2473 size = sizeof(*get) +
2474 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2475 if (*len != size) {
1e3e238e 2476 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2477 ret = -EINVAL;
2478 goto out;
2479 }
2480 ret = __ip_vs_get_dest_entries(get, user);
2481 }
2482 break;
2483
2484 case IP_VS_SO_GET_TIMEOUT:
2485 {
2486 struct ip_vs_timeout_user t;
2487
2488 __ip_vs_get_timeouts(&t);
2489 if (copy_to_user(user, &t, sizeof(t)) != 0)
2490 ret = -EFAULT;
2491 }
2492 break;
2493
2494 case IP_VS_SO_GET_DAEMON:
2495 {
2496 struct ip_vs_daemon_user d[2];
2497
2498 memset(&d, 0, sizeof(d));
2499 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2500 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2501 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2502 d[0].syncid = ip_vs_master_syncid;
2503 }
2504 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2505 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2506 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2507 d[1].syncid = ip_vs_backup_syncid;
2508 }
2509 if (copy_to_user(user, &d, sizeof(d)) != 0)
2510 ret = -EFAULT;
2511 }
2512 break;
2513
2514 default:
2515 ret = -EINVAL;
2516 }
2517
2518 out:
14cc3e2b 2519 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2520 return ret;
2521}
2522
2523
2524static struct nf_sockopt_ops ip_vs_sockopts = {
2525 .pf = PF_INET,
2526 .set_optmin = IP_VS_BASE_CTL,
2527 .set_optmax = IP_VS_SO_SET_MAX+1,
2528 .set = do_ip_vs_set_ctl,
2529 .get_optmin = IP_VS_BASE_CTL,
2530 .get_optmax = IP_VS_SO_GET_MAX+1,
2531 .get = do_ip_vs_get_ctl,
16fcec35 2532 .owner = THIS_MODULE,
1da177e4
LT
2533};
2534
9a812198
JV
2535/*
2536 * Generic Netlink interface
2537 */
2538
2539/* IPVS genetlink family */
2540static struct genl_family ip_vs_genl_family = {
2541 .id = GENL_ID_GENERATE,
2542 .hdrsize = 0,
2543 .name = IPVS_GENL_NAME,
2544 .version = IPVS_GENL_VERSION,
2545 .maxattr = IPVS_CMD_MAX,
2546};
2547
2548/* Policy used for first-level command attributes */
2549static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2550 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2551 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2552 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2553 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2554 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2555 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2556};
2557
2558/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2559static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2560 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2561 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2562 .len = IP_VS_IFNAME_MAXLEN },
2563 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2564};
2565
2566/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2567static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2568 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2569 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2570 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2571 .len = sizeof(union nf_inet_addr) },
2572 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2573 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2574 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2575 .len = IP_VS_SCHEDNAME_MAXLEN },
2576 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2577 .len = sizeof(struct ip_vs_flags) },
2578 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2579 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2580 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2581};
2582
2583/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2584static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2585 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2586 .len = sizeof(union nf_inet_addr) },
2587 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2588 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2589 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2590 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2591 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2592 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2593 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2594 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2595 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2596};
2597
2598static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2599 struct ip_vs_stats *stats)
2600{
2601 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2602 if (!nl_stats)
2603 return -EMSGSIZE;
2604
2605 spin_lock_bh(&stats->lock);
2606
e9c0ce23
SW
2607 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2608 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2609 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2610 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2611 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2612 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2613 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2614 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2617
2618 spin_unlock_bh(&stats->lock);
2619
2620 nla_nest_end(skb, nl_stats);
2621
2622 return 0;
2623
2624nla_put_failure:
2625 spin_unlock_bh(&stats->lock);
2626 nla_nest_cancel(skb, nl_stats);
2627 return -EMSGSIZE;
2628}
2629
2630static int ip_vs_genl_fill_service(struct sk_buff *skb,
2631 struct ip_vs_service *svc)
2632{
2633 struct nlattr *nl_service;
2634 struct ip_vs_flags flags = { .flags = svc->flags,
2635 .mask = ~0 };
2636
2637 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2638 if (!nl_service)
2639 return -EMSGSIZE;
2640
f94fd041 2641 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2642
2643 if (svc->fwmark) {
2644 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2645 } else {
2646 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2647 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2648 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2649 }
2650
2651 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2652 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2653 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2654 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2655
2656 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2657 goto nla_put_failure;
2658
2659 nla_nest_end(skb, nl_service);
2660
2661 return 0;
2662
2663nla_put_failure:
2664 nla_nest_cancel(skb, nl_service);
2665 return -EMSGSIZE;
2666}
2667
2668static int ip_vs_genl_dump_service(struct sk_buff *skb,
2669 struct ip_vs_service *svc,
2670 struct netlink_callback *cb)
2671{
2672 void *hdr;
2673
2674 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2675 &ip_vs_genl_family, NLM_F_MULTI,
2676 IPVS_CMD_NEW_SERVICE);
2677 if (!hdr)
2678 return -EMSGSIZE;
2679
2680 if (ip_vs_genl_fill_service(skb, svc) < 0)
2681 goto nla_put_failure;
2682
2683 return genlmsg_end(skb, hdr);
2684
2685nla_put_failure:
2686 genlmsg_cancel(skb, hdr);
2687 return -EMSGSIZE;
2688}
2689
2690static int ip_vs_genl_dump_services(struct sk_buff *skb,
2691 struct netlink_callback *cb)
2692{
2693 int idx = 0, i;
2694 int start = cb->args[0];
2695 struct ip_vs_service *svc;
2696
2697 mutex_lock(&__ip_vs_mutex);
2698 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2699 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2700 if (++idx <= start)
2701 continue;
2702 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2703 idx--;
2704 goto nla_put_failure;
2705 }
2706 }
2707 }
2708
2709 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2710 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2711 if (++idx <= start)
2712 continue;
2713 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2714 idx--;
2715 goto nla_put_failure;
2716 }
2717 }
2718 }
2719
2720nla_put_failure:
2721 mutex_unlock(&__ip_vs_mutex);
2722 cb->args[0] = idx;
2723
2724 return skb->len;
2725}
2726
c860c6b1 2727static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2728 struct nlattr *nla, int full_entry)
2729{
2730 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2731 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2732
2733 /* Parse mandatory identifying service fields first */
2734 if (nla == NULL ||
2735 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2736 return -EINVAL;
2737
2738 nla_af = attrs[IPVS_SVC_ATTR_AF];
2739 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2740 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2741 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2742 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2743
2744 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2745 return -EINVAL;
2746
258c8893
SH
2747 memset(usvc, 0, sizeof(*usvc));
2748
c860c6b1 2749 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2750#ifdef CONFIG_IP_VS_IPV6
2751 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2752#else
2753 if (usvc->af != AF_INET)
2754#endif
9a812198
JV
2755 return -EAFNOSUPPORT;
2756
2757 if (nla_fwmark) {
2758 usvc->protocol = IPPROTO_TCP;
2759 usvc->fwmark = nla_get_u32(nla_fwmark);
2760 } else {
2761 usvc->protocol = nla_get_u16(nla_protocol);
2762 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2763 usvc->port = nla_get_u16(nla_port);
2764 usvc->fwmark = 0;
2765 }
2766
2767 /* If a full entry was requested, check for the additional fields */
2768 if (full_entry) {
2769 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2770 *nla_netmask;
2771 struct ip_vs_flags flags;
2772 struct ip_vs_service *svc;
2773
2774 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2775 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2776 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2777 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2778
2779 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2780 return -EINVAL;
2781
2782 nla_memcpy(&flags, nla_flags, sizeof(flags));
2783
2784 /* prefill flags from service if it already exists */
2785 if (usvc->fwmark)
b18610de 2786 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2787 else
b18610de
JV
2788 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2789 &usvc->addr, usvc->port);
9a812198
JV
2790 if (svc) {
2791 usvc->flags = svc->flags;
2792 ip_vs_service_put(svc);
2793 } else
2794 usvc->flags = 0;
2795
2796 /* set new flags from userland */
2797 usvc->flags = (usvc->flags & ~flags.mask) |
2798 (flags.flags & flags.mask);
c860c6b1 2799 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2800 usvc->timeout = nla_get_u32(nla_timeout);
2801 usvc->netmask = nla_get_u32(nla_netmask);
2802 }
2803
2804 return 0;
2805}
2806
2807static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2808{
c860c6b1 2809 struct ip_vs_service_user_kern usvc;
9a812198
JV
2810 int ret;
2811
2812 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2813 if (ret)
2814 return ERR_PTR(ret);
2815
2816 if (usvc.fwmark)
b18610de 2817 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2818 else
b18610de
JV
2819 return __ip_vs_service_get(usvc.af, usvc.protocol,
2820 &usvc.addr, usvc.port);
9a812198
JV
2821}
2822
2823static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2824{
2825 struct nlattr *nl_dest;
2826
2827 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2828 if (!nl_dest)
2829 return -EMSGSIZE;
2830
2831 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2832 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2833
2834 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2835 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2836 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2837 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2838 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2839 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2840 atomic_read(&dest->activeconns));
2841 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2842 atomic_read(&dest->inactconns));
2843 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2844 atomic_read(&dest->persistconns));
2845
2846 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2847 goto nla_put_failure;
2848
2849 nla_nest_end(skb, nl_dest);
2850
2851 return 0;
2852
2853nla_put_failure:
2854 nla_nest_cancel(skb, nl_dest);
2855 return -EMSGSIZE;
2856}
2857
2858static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2859 struct netlink_callback *cb)
2860{
2861 void *hdr;
2862
2863 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2864 &ip_vs_genl_family, NLM_F_MULTI,
2865 IPVS_CMD_NEW_DEST);
2866 if (!hdr)
2867 return -EMSGSIZE;
2868
2869 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2870 goto nla_put_failure;
2871
2872 return genlmsg_end(skb, hdr);
2873
2874nla_put_failure:
2875 genlmsg_cancel(skb, hdr);
2876 return -EMSGSIZE;
2877}
2878
2879static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2880 struct netlink_callback *cb)
2881{
2882 int idx = 0;
2883 int start = cb->args[0];
2884 struct ip_vs_service *svc;
2885 struct ip_vs_dest *dest;
2886 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2887
2888 mutex_lock(&__ip_vs_mutex);
2889
2890 /* Try to find the service for which to dump destinations */
2891 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2892 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2893 goto out_err;
2894
2895 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2896 if (IS_ERR(svc) || svc == NULL)
2897 goto out_err;
2898
2899 /* Dump the destinations */
2900 list_for_each_entry(dest, &svc->destinations, n_list) {
2901 if (++idx <= start)
2902 continue;
2903 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2904 idx--;
2905 goto nla_put_failure;
2906 }
2907 }
2908
2909nla_put_failure:
2910 cb->args[0] = idx;
2911 ip_vs_service_put(svc);
2912
2913out_err:
2914 mutex_unlock(&__ip_vs_mutex);
2915
2916 return skb->len;
2917}
2918
c860c6b1 2919static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2920 struct nlattr *nla, int full_entry)
2921{
2922 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2923 struct nlattr *nla_addr, *nla_port;
2924
2925 /* Parse mandatory identifying destination fields first */
2926 if (nla == NULL ||
2927 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2928 return -EINVAL;
2929
2930 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2931 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2932
2933 if (!(nla_addr && nla_port))
2934 return -EINVAL;
2935
258c8893
SH
2936 memset(udest, 0, sizeof(*udest));
2937
9a812198
JV
2938 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2939 udest->port = nla_get_u16(nla_port);
2940
2941 /* If a full entry was requested, check for the additional fields */
2942 if (full_entry) {
2943 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2944 *nla_l_thresh;
2945
2946 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2947 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2948 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2949 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2950
2951 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2952 return -EINVAL;
2953
2954 udest->conn_flags = nla_get_u32(nla_fwd)
2955 & IP_VS_CONN_F_FWD_MASK;
2956 udest->weight = nla_get_u32(nla_weight);
2957 udest->u_threshold = nla_get_u32(nla_u_thresh);
2958 udest->l_threshold = nla_get_u32(nla_l_thresh);
2959 }
2960
2961 return 0;
2962}
2963
2964static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2965 const char *mcast_ifn, __be32 syncid)
2966{
2967 struct nlattr *nl_daemon;
2968
2969 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2970 if (!nl_daemon)
2971 return -EMSGSIZE;
2972
2973 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2974 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2975 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2976
2977 nla_nest_end(skb, nl_daemon);
2978
2979 return 0;
2980
2981nla_put_failure:
2982 nla_nest_cancel(skb, nl_daemon);
2983 return -EMSGSIZE;
2984}
2985
2986static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2987 const char *mcast_ifn, __be32 syncid,
2988 struct netlink_callback *cb)
2989{
2990 void *hdr;
2991 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2992 &ip_vs_genl_family, NLM_F_MULTI,
2993 IPVS_CMD_NEW_DAEMON);
2994 if (!hdr)
2995 return -EMSGSIZE;
2996
2997 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2998 goto nla_put_failure;
2999
3000 return genlmsg_end(skb, hdr);
3001
3002nla_put_failure:
3003 genlmsg_cancel(skb, hdr);
3004 return -EMSGSIZE;
3005}
3006
3007static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3008 struct netlink_callback *cb)
3009{
3010 mutex_lock(&__ip_vs_mutex);
3011 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3012 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3013 ip_vs_master_mcast_ifn,
3014 ip_vs_master_syncid, cb) < 0)
3015 goto nla_put_failure;
3016
3017 cb->args[0] = 1;
3018 }
3019
3020 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3021 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3022 ip_vs_backup_mcast_ifn,
3023 ip_vs_backup_syncid, cb) < 0)
3024 goto nla_put_failure;
3025
3026 cb->args[1] = 1;
3027 }
3028
3029nla_put_failure:
3030 mutex_unlock(&__ip_vs_mutex);
3031
3032 return skb->len;
3033}
3034
3035static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3036{
3037 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3038 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3039 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3040 return -EINVAL;
3041
3042 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3043 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3044 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3045}
3046
3047static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3048{
3049 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3050 return -EINVAL;
3051
3052 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3053}
3054
3055static int ip_vs_genl_set_config(struct nlattr **attrs)
3056{
3057 struct ip_vs_timeout_user t;
3058
3059 __ip_vs_get_timeouts(&t);
3060
3061 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3062 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3063
3064 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3065 t.tcp_fin_timeout =
3066 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3067
3068 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3069 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3070
3071 return ip_vs_set_timeout(&t);
3072}
3073
3074static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3075{
3076 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3077 struct ip_vs_service_user_kern usvc;
3078 struct ip_vs_dest_user_kern udest;
9a812198
JV
3079 int ret = 0, cmd;
3080 int need_full_svc = 0, need_full_dest = 0;
3081
3082 cmd = info->genlhdr->cmd;
3083
3084 mutex_lock(&__ip_vs_mutex);
3085
3086 if (cmd == IPVS_CMD_FLUSH) {
3087 ret = ip_vs_flush();
3088 goto out;
3089 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3090 ret = ip_vs_genl_set_config(info->attrs);
3091 goto out;
3092 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3093 cmd == IPVS_CMD_DEL_DAEMON) {
3094
3095 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3096
3097 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3098 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3099 info->attrs[IPVS_CMD_ATTR_DAEMON],
3100 ip_vs_daemon_policy)) {
3101 ret = -EINVAL;
3102 goto out;
3103 }
3104
3105 if (cmd == IPVS_CMD_NEW_DAEMON)
3106 ret = ip_vs_genl_new_daemon(daemon_attrs);
3107 else
3108 ret = ip_vs_genl_del_daemon(daemon_attrs);
3109 goto out;
3110 } else if (cmd == IPVS_CMD_ZERO &&
3111 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3112 ret = ip_vs_zero_all();
3113 goto out;
3114 }
3115
3116 /* All following commands require a service argument, so check if we
3117 * received a valid one. We need a full service specification when
3118 * adding / editing a service. Only identifying members otherwise. */
3119 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3120 need_full_svc = 1;
3121
3122 ret = ip_vs_genl_parse_service(&usvc,
3123 info->attrs[IPVS_CMD_ATTR_SERVICE],
3124 need_full_svc);
3125 if (ret)
3126 goto out;
3127
3128 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3129 if (usvc.fwmark == 0)
b18610de
JV
3130 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3131 &usvc.addr, usvc.port);
9a812198 3132 else
b18610de 3133 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3134
3135 /* Unless we're adding a new service, the service must already exist */
3136 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3137 ret = -ESRCH;
3138 goto out;
3139 }
3140
3141 /* Destination commands require a valid destination argument. For
3142 * adding / editing a destination, we need a full destination
3143 * specification. */
3144 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3145 cmd == IPVS_CMD_DEL_DEST) {
3146 if (cmd != IPVS_CMD_DEL_DEST)
3147 need_full_dest = 1;
3148
3149 ret = ip_vs_genl_parse_dest(&udest,
3150 info->attrs[IPVS_CMD_ATTR_DEST],
3151 need_full_dest);
3152 if (ret)
3153 goto out;
3154 }
3155
3156 switch (cmd) {
3157 case IPVS_CMD_NEW_SERVICE:
3158 if (svc == NULL)
3159 ret = ip_vs_add_service(&usvc, &svc);
3160 else
3161 ret = -EEXIST;
3162 break;
3163 case IPVS_CMD_SET_SERVICE:
3164 ret = ip_vs_edit_service(svc, &usvc);
3165 break;
3166 case IPVS_CMD_DEL_SERVICE:
3167 ret = ip_vs_del_service(svc);
3168 break;
3169 case IPVS_CMD_NEW_DEST:
3170 ret = ip_vs_add_dest(svc, &udest);
3171 break;
3172 case IPVS_CMD_SET_DEST:
3173 ret = ip_vs_edit_dest(svc, &udest);
3174 break;
3175 case IPVS_CMD_DEL_DEST:
3176 ret = ip_vs_del_dest(svc, &udest);
3177 break;
3178 case IPVS_CMD_ZERO:
3179 ret = ip_vs_zero_service(svc);
3180 break;
3181 default:
3182 ret = -EINVAL;
3183 }
3184
3185out:
3186 if (svc)
3187 ip_vs_service_put(svc);
3188 mutex_unlock(&__ip_vs_mutex);
3189
3190 return ret;
3191}
3192
3193static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3194{
3195 struct sk_buff *msg;
3196 void *reply;
3197 int ret, cmd, reply_cmd;
3198
3199 cmd = info->genlhdr->cmd;
3200
3201 if (cmd == IPVS_CMD_GET_SERVICE)
3202 reply_cmd = IPVS_CMD_NEW_SERVICE;
3203 else if (cmd == IPVS_CMD_GET_INFO)
3204 reply_cmd = IPVS_CMD_SET_INFO;
3205 else if (cmd == IPVS_CMD_GET_CONFIG)
3206 reply_cmd = IPVS_CMD_SET_CONFIG;
3207 else {
1e3e238e 3208 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3209 return -EINVAL;
3210 }
3211
3212 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3213 if (!msg)
3214 return -ENOMEM;
3215
3216 mutex_lock(&__ip_vs_mutex);
3217
3218 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3219 if (reply == NULL)
3220 goto nla_put_failure;
3221
3222 switch (cmd) {
3223 case IPVS_CMD_GET_SERVICE:
3224 {
3225 struct ip_vs_service *svc;
3226
3227 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3228 if (IS_ERR(svc)) {
3229 ret = PTR_ERR(svc);
3230 goto out_err;
3231 } else if (svc) {
3232 ret = ip_vs_genl_fill_service(msg, svc);
3233 ip_vs_service_put(svc);
3234 if (ret)
3235 goto nla_put_failure;
3236 } else {
3237 ret = -ESRCH;
3238 goto out_err;
3239 }
3240
3241 break;
3242 }
3243
3244 case IPVS_CMD_GET_CONFIG:
3245 {
3246 struct ip_vs_timeout_user t;
3247
3248 __ip_vs_get_timeouts(&t);
3249#ifdef CONFIG_IP_VS_PROTO_TCP
3250 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3251 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3252 t.tcp_fin_timeout);
3253#endif
3254#ifdef CONFIG_IP_VS_PROTO_UDP
3255 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3256#endif
3257
3258 break;
3259 }
3260
3261 case IPVS_CMD_GET_INFO:
3262 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3263 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3264 ip_vs_conn_tab_size);
9a812198
JV
3265 break;
3266 }
3267
3268 genlmsg_end(msg, reply);
134e6375 3269 ret = genlmsg_reply(msg, info);
9a812198
JV
3270 goto out;
3271
3272nla_put_failure:
1e3e238e 3273 pr_err("not enough space in Netlink message\n");
9a812198
JV
3274 ret = -EMSGSIZE;
3275
3276out_err:
3277 nlmsg_free(msg);
3278out:
3279 mutex_unlock(&__ip_vs_mutex);
3280
3281 return ret;
3282}
3283
3284
3285static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3286 {
3287 .cmd = IPVS_CMD_NEW_SERVICE,
3288 .flags = GENL_ADMIN_PERM,
3289 .policy = ip_vs_cmd_policy,
3290 .doit = ip_vs_genl_set_cmd,
3291 },
3292 {
3293 .cmd = IPVS_CMD_SET_SERVICE,
3294 .flags = GENL_ADMIN_PERM,
3295 .policy = ip_vs_cmd_policy,
3296 .doit = ip_vs_genl_set_cmd,
3297 },
3298 {
3299 .cmd = IPVS_CMD_DEL_SERVICE,
3300 .flags = GENL_ADMIN_PERM,
3301 .policy = ip_vs_cmd_policy,
3302 .doit = ip_vs_genl_set_cmd,
3303 },
3304 {
3305 .cmd = IPVS_CMD_GET_SERVICE,
3306 .flags = GENL_ADMIN_PERM,
3307 .doit = ip_vs_genl_get_cmd,
3308 .dumpit = ip_vs_genl_dump_services,
3309 .policy = ip_vs_cmd_policy,
3310 },
3311 {
3312 .cmd = IPVS_CMD_NEW_DEST,
3313 .flags = GENL_ADMIN_PERM,
3314 .policy = ip_vs_cmd_policy,
3315 .doit = ip_vs_genl_set_cmd,
3316 },
3317 {
3318 .cmd = IPVS_CMD_SET_DEST,
3319 .flags = GENL_ADMIN_PERM,
3320 .policy = ip_vs_cmd_policy,
3321 .doit = ip_vs_genl_set_cmd,
3322 },
3323 {
3324 .cmd = IPVS_CMD_DEL_DEST,
3325 .flags = GENL_ADMIN_PERM,
3326 .policy = ip_vs_cmd_policy,
3327 .doit = ip_vs_genl_set_cmd,
3328 },
3329 {
3330 .cmd = IPVS_CMD_GET_DEST,
3331 .flags = GENL_ADMIN_PERM,
3332 .policy = ip_vs_cmd_policy,
3333 .dumpit = ip_vs_genl_dump_dests,
3334 },
3335 {
3336 .cmd = IPVS_CMD_NEW_DAEMON,
3337 .flags = GENL_ADMIN_PERM,
3338 .policy = ip_vs_cmd_policy,
3339 .doit = ip_vs_genl_set_cmd,
3340 },
3341 {
3342 .cmd = IPVS_CMD_DEL_DAEMON,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3346 },
3347 {
3348 .cmd = IPVS_CMD_GET_DAEMON,
3349 .flags = GENL_ADMIN_PERM,
3350 .dumpit = ip_vs_genl_dump_daemons,
3351 },
3352 {
3353 .cmd = IPVS_CMD_SET_CONFIG,
3354 .flags = GENL_ADMIN_PERM,
3355 .policy = ip_vs_cmd_policy,
3356 .doit = ip_vs_genl_set_cmd,
3357 },
3358 {
3359 .cmd = IPVS_CMD_GET_CONFIG,
3360 .flags = GENL_ADMIN_PERM,
3361 .doit = ip_vs_genl_get_cmd,
3362 },
3363 {
3364 .cmd = IPVS_CMD_GET_INFO,
3365 .flags = GENL_ADMIN_PERM,
3366 .doit = ip_vs_genl_get_cmd,
3367 },
3368 {
3369 .cmd = IPVS_CMD_ZERO,
3370 .flags = GENL_ADMIN_PERM,
3371 .policy = ip_vs_cmd_policy,
3372 .doit = ip_vs_genl_set_cmd,
3373 },
3374 {
3375 .cmd = IPVS_CMD_FLUSH,
3376 .flags = GENL_ADMIN_PERM,
3377 .doit = ip_vs_genl_set_cmd,
3378 },
3379};
3380
3381static int __init ip_vs_genl_register(void)
3382{
8f698d54
MM
3383 return genl_register_family_with_ops(&ip_vs_genl_family,
3384 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3385}
3386
3387static void ip_vs_genl_unregister(void)
3388{
3389 genl_unregister_family(&ip_vs_genl_family);
3390}
3391
3392/* End of Generic Netlink interface definitions */
3393
1da177e4 3394
048cf48b 3395int __init ip_vs_control_init(void)
1da177e4
LT
3396{
3397 int ret;
3398 int idx;
3399
3400 EnterFunction(2);
3401
3402 ret = nf_register_sockopt(&ip_vs_sockopts);
3403 if (ret) {
1e3e238e 3404 pr_err("cannot register sockopt.\n");
1da177e4
LT
3405 return ret;
3406 }
3407
9a812198
JV
3408 ret = ip_vs_genl_register();
3409 if (ret) {
1e3e238e 3410 pr_err("cannot register Generic Netlink interface.\n");
9a812198
JV
3411 nf_unregister_sockopt(&ip_vs_sockopts);
3412 return ret;
3413 }
3414
457c4cbc
EB
3415 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3416 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3417
90754f8e 3418 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3419
3420 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3421 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3422 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3423 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3424 }
3425 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3426 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3427 }
3428
1da177e4
LT
3429 ip_vs_new_estimator(&ip_vs_stats);
3430
3431 /* Hook the defense timer */
3432 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3433
3434 LeaveFunction(2);
3435 return 0;
3436}
3437
3438
3439void ip_vs_control_cleanup(void)
3440{
3441 EnterFunction(2);
3442 ip_vs_trash_cleanup();
3443 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3444 cancel_work_sync(&defense_work.work);
1da177e4
LT
3445 ip_vs_kill_estimator(&ip_vs_stats);
3446 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3447 proc_net_remove(&init_net, "ip_vs_stats");
3448 proc_net_remove(&init_net, "ip_vs");
9a812198 3449 ip_vs_genl_unregister();
1da177e4
LT
3450 nf_unregister_sockopt(&ip_vs_sockopts);
3451 LeaveFunction(2);
3452}