]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/netfilter/ipvs/ip_vs_ctl.c
netfilter: save the hash of the tuple in the original direction for latter use
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
1da177e4 41#include <net/ip.h>
09571c7a
VB
42#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h>
44#include <net/ip6_route.h>
45#endif
14c85021 46#include <net/route.h>
1da177e4 47#include <net/sock.h>
9a812198 48#include <net/genetlink.h>
1da177e4
LT
49
50#include <asm/uaccess.h>
51
52#include <net/ip_vs.h>
53
54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 55static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
56
57/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
4f72816e 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
1da177e4
LT
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
f4bc17cd
JA
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
8a803040 94int sysctl_ip_vs_snat_reroute = 1;
1da177e4
LT
95
96
97#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0;
99
100int ip_vs_get_debug_level(void)
101{
102 return sysctl_ip_vs_debug_level;
103}
104#endif
105
09571c7a
VB
106#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109{
110 struct rt6_info *rt;
111 struct flowi fl = {
112 .oif = 0,
113 .nl_u = {
114 .ip6_u = {
115 .daddr = *addr,
116 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
117 };
118
119 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
121 return 1;
122
123 return 0;
124}
125#endif
1da177e4 126/*
af9debd4
JA
127 * update_defense_level is called from keventd and from sysctl,
128 * so it needs to protect itself from softirqs
1da177e4
LT
129 */
130static void update_defense_level(void)
131{
132 struct sysinfo i;
133 static int old_secure_tcp = 0;
134 int availmem;
135 int nomem;
136 int to_change = -1;
137
138 /* we only count free and buffered memory (in pages) */
139 si_meminfo(&i);
140 availmem = i.freeram + i.bufferram;
141 /* however in linux 2.5 the i.bufferram is total page cache size,
142 we need adjust it */
143 /* si_swapinfo(&i); */
144 /* availmem = availmem - (i.totalswap - i.freeswap); */
145
146 nomem = (availmem < sysctl_ip_vs_amemthresh);
147
af9debd4
JA
148 local_bh_disable();
149
1da177e4
LT
150 /* drop_entry */
151 spin_lock(&__ip_vs_dropentry_lock);
152 switch (sysctl_ip_vs_drop_entry) {
153 case 0:
154 atomic_set(&ip_vs_dropentry, 0);
155 break;
156 case 1:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 sysctl_ip_vs_drop_entry = 2;
160 } else {
161 atomic_set(&ip_vs_dropentry, 0);
162 }
163 break;
164 case 2:
165 if (nomem) {
166 atomic_set(&ip_vs_dropentry, 1);
167 } else {
168 atomic_set(&ip_vs_dropentry, 0);
169 sysctl_ip_vs_drop_entry = 1;
170 };
171 break;
172 case 3:
173 atomic_set(&ip_vs_dropentry, 1);
174 break;
175 }
176 spin_unlock(&__ip_vs_dropentry_lock);
177
178 /* drop_packet */
179 spin_lock(&__ip_vs_droppacket_lock);
180 switch (sysctl_ip_vs_drop_packet) {
181 case 0:
182 ip_vs_drop_rate = 0;
183 break;
184 case 1:
185 if (nomem) {
186 ip_vs_drop_rate = ip_vs_drop_counter
187 = sysctl_ip_vs_amemthresh /
188 (sysctl_ip_vs_amemthresh-availmem);
189 sysctl_ip_vs_drop_packet = 2;
190 } else {
191 ip_vs_drop_rate = 0;
192 }
193 break;
194 case 2:
195 if (nomem) {
196 ip_vs_drop_rate = ip_vs_drop_counter
197 = sysctl_ip_vs_amemthresh /
198 (sysctl_ip_vs_amemthresh-availmem);
199 } else {
200 ip_vs_drop_rate = 0;
201 sysctl_ip_vs_drop_packet = 1;
202 }
203 break;
204 case 3:
205 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
206 break;
207 }
208 spin_unlock(&__ip_vs_droppacket_lock);
209
210 /* secure_tcp */
4f72816e 211 spin_lock(&ip_vs_securetcp_lock);
1da177e4
LT
212 switch (sysctl_ip_vs_secure_tcp) {
213 case 0:
214 if (old_secure_tcp >= 2)
215 to_change = 0;
216 break;
217 case 1:
218 if (nomem) {
219 if (old_secure_tcp < 2)
220 to_change = 1;
221 sysctl_ip_vs_secure_tcp = 2;
222 } else {
223 if (old_secure_tcp >= 2)
224 to_change = 0;
225 }
226 break;
227 case 2:
228 if (nomem) {
229 if (old_secure_tcp < 2)
230 to_change = 1;
231 } else {
232 if (old_secure_tcp >= 2)
233 to_change = 0;
234 sysctl_ip_vs_secure_tcp = 1;
235 }
236 break;
237 case 3:
238 if (old_secure_tcp < 2)
239 to_change = 1;
240 break;
241 }
242 old_secure_tcp = sysctl_ip_vs_secure_tcp;
243 if (to_change >= 0)
244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
4f72816e 245 spin_unlock(&ip_vs_securetcp_lock);
af9debd4
JA
246
247 local_bh_enable();
1da177e4
LT
248}
249
250
251/*
252 * Timer for checking the defense
253 */
254#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
255static void defense_work_handler(struct work_struct *work);
256static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 257
c4028958 258static void defense_work_handler(struct work_struct *work)
1da177e4
LT
259{
260 update_defense_level();
261 if (atomic_read(&ip_vs_dropentry))
262 ip_vs_random_dropentry();
263
264 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
265}
266
267int
268ip_vs_use_count_inc(void)
269{
270 return try_module_get(THIS_MODULE);
271}
272
273void
274ip_vs_use_count_dec(void)
275{
276 module_put(THIS_MODULE);
277}
278
279
280/*
281 * Hash table: for virtual service lookups
282 */
283#define IP_VS_SVC_TAB_BITS 8
284#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
286
287/* the service table hashed by <protocol, addr, port> */
288static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289/* the service table hashed by fwmark */
290static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
291
292/*
293 * Hash table: for real service lookups
294 */
295#define IP_VS_RTAB_BITS 4
296#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
298
299static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
300
301/*
302 * Trash for destinations
303 */
304static LIST_HEAD(ip_vs_dest_trash);
305
306/*
307 * FTP & NULL virtual service counters
308 */
309static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
311
312
313/*
314 * Returns hash value for virtual service
315 */
316static __inline__ unsigned
b18610de
JV
317ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
318 __be16 port)
1da177e4
LT
319{
320 register unsigned porth = ntohs(port);
b18610de 321 __be32 addr_fold = addr->ip;
1da177e4 322
b18610de
JV
323#ifdef CONFIG_IP_VS_IPV6
324 if (af == AF_INET6)
325 addr_fold = addr->ip6[0]^addr->ip6[1]^
326 addr->ip6[2]^addr->ip6[3];
327#endif
328
329 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
330 & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Returns hash value of fwmark for virtual service lookup
335 */
336static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
337{
338 return fwmark & IP_VS_SVC_TAB_MASK;
339}
340
341/*
342 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343 * or in the ip_vs_svc_fwm_table by fwmark.
344 * Should be called with locked tables.
345 */
346static int ip_vs_svc_hash(struct ip_vs_service *svc)
347{
348 unsigned hash;
349
350 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
351 pr_err("%s(): request for already hashed, called from %pF\n",
352 __func__, __builtin_return_address(0));
1da177e4
LT
353 return 0;
354 }
355
356 if (svc->fwmark == 0) {
357 /*
358 * Hash it by <protocol,addr,port> in ip_vs_svc_table
359 */
b18610de 360 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 361 svc->port);
1da177e4
LT
362 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
363 } else {
364 /*
365 * Hash it by fwmark in ip_vs_svc_fwm_table
366 */
367 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
369 }
370
371 svc->flags |= IP_VS_SVC_F_HASHED;
372 /* increase its refcnt because it is referenced by the svc table */
373 atomic_inc(&svc->refcnt);
374 return 1;
375}
376
377
378/*
379 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380 * Should be called with locked tables.
381 */
382static int ip_vs_svc_unhash(struct ip_vs_service *svc)
383{
384 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
385 pr_err("%s(): request for unhash flagged, called from %pF\n",
386 __func__, __builtin_return_address(0));
1da177e4
LT
387 return 0;
388 }
389
390 if (svc->fwmark == 0) {
391 /* Remove it from the ip_vs_svc_table table */
392 list_del(&svc->s_list);
393 } else {
394 /* Remove it from the ip_vs_svc_fwm_table table */
395 list_del(&svc->f_list);
396 }
397
398 svc->flags &= ~IP_VS_SVC_F_HASHED;
399 atomic_dec(&svc->refcnt);
400 return 1;
401}
402
403
404/*
405 * Get service by {proto,addr,port} in the service table.
406 */
b18610de
JV
407static inline struct ip_vs_service *
408__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
409 __be16 vport)
1da177e4
LT
410{
411 unsigned hash;
412 struct ip_vs_service *svc;
413
414 /* Check for "full" addressed entries */
b18610de 415 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
416
417 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
418 if ((svc->af == af)
419 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
420 && (svc->port == vport)
421 && (svc->protocol == protocol)) {
422 /* HIT */
423 atomic_inc(&svc->usecnt);
424 return svc;
425 }
426 }
427
428 return NULL;
429}
430
431
432/*
433 * Get service by {fwmark} in the service table.
434 */
b18610de
JV
435static inline struct ip_vs_service *
436__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
437{
438 unsigned hash;
439 struct ip_vs_service *svc;
440
441 /* Check for fwmark addressed entries */
442 hash = ip_vs_svc_fwm_hashkey(fwmark);
443
444 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 445 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
446 /* HIT */
447 atomic_inc(&svc->usecnt);
448 return svc;
449 }
450 }
451
452 return NULL;
453}
454
455struct ip_vs_service *
3c2e0505
JV
456ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
457 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
458{
459 struct ip_vs_service *svc;
3c2e0505 460
1da177e4
LT
461 read_lock(&__ip_vs_svc_lock);
462
463 /*
464 * Check the table hashed by fwmark first
465 */
3c2e0505 466 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
467 goto out;
468
469 /*
470 * Check the table hashed by <protocol,addr,port>
471 * for "full" addressed entries
472 */
3c2e0505 473 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
474
475 if (svc == NULL
476 && protocol == IPPROTO_TCP
477 && atomic_read(&ip_vs_ftpsvc_counter)
478 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
479 /*
480 * Check if ftp service entry exists, the packet
481 * might belong to FTP data connections.
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
484 }
485
486 if (svc == NULL
487 && atomic_read(&ip_vs_nullsvc_counter)) {
488 /*
489 * Check if the catch-all port (port zero) exists
490 */
3c2e0505 491 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
492 }
493
494 out:
495 read_unlock(&__ip_vs_svc_lock);
496
3c2e0505
JV
497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498 fwmark, ip_vs_proto_name(protocol),
499 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500 svc ? "hit" : "not hit");
1da177e4
LT
501
502 return svc;
503}
504
505
506static inline void
507__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
508{
509 atomic_inc(&svc->refcnt);
510 dest->svc = svc;
511}
512
513static inline void
514__ip_vs_unbind_svc(struct ip_vs_dest *dest)
515{
516 struct ip_vs_service *svc = dest->svc;
517
518 dest->svc = NULL;
519 if (atomic_dec_and_test(&svc->refcnt))
520 kfree(svc);
521}
522
523
524/*
525 * Returns hash value for real service
526 */
7937df15
JV
527static inline unsigned ip_vs_rs_hashkey(int af,
528 const union nf_inet_addr *addr,
529 __be16 port)
1da177e4
LT
530{
531 register unsigned porth = ntohs(port);
7937df15
JV
532 __be32 addr_fold = addr->ip;
533
534#ifdef CONFIG_IP_VS_IPV6
535 if (af == AF_INET6)
536 addr_fold = addr->ip6[0]^addr->ip6[1]^
537 addr->ip6[2]^addr->ip6[3];
538#endif
1da177e4 539
7937df15 540 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
541 & IP_VS_RTAB_MASK;
542}
543
544/*
545 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
546 * should be called with locked tables.
547 */
548static int ip_vs_rs_hash(struct ip_vs_dest *dest)
549{
550 unsigned hash;
551
552 if (!list_empty(&dest->d_list)) {
553 return 0;
554 }
555
556 /*
557 * Hash by proto,addr,port,
558 * which are the parameters of the real service.
559 */
7937df15
JV
560 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
561
1da177e4
LT
562 list_add(&dest->d_list, &ip_vs_rtable[hash]);
563
564 return 1;
565}
566
567/*
568 * UNhashes ip_vs_dest from ip_vs_rtable.
569 * should be called with locked tables.
570 */
571static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
572{
573 /*
574 * Remove it from the ip_vs_rtable table.
575 */
576 if (!list_empty(&dest->d_list)) {
577 list_del(&dest->d_list);
578 INIT_LIST_HEAD(&dest->d_list);
579 }
580
581 return 1;
582}
583
584/*
585 * Lookup real service by <proto,addr,port> in the real service table.
586 */
587struct ip_vs_dest *
7937df15
JV
588ip_vs_lookup_real_service(int af, __u16 protocol,
589 const union nf_inet_addr *daddr,
590 __be16 dport)
1da177e4
LT
591{
592 unsigned hash;
593 struct ip_vs_dest *dest;
594
595 /*
596 * Check for "full" addressed entries
597 * Return the first found entry
598 */
7937df15 599 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
600
601 read_lock(&__ip_vs_rs_lock);
602 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
603 if ((dest->af == af)
604 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
605 && (dest->port == dport)
606 && ((dest->protocol == protocol) ||
607 dest->vfwmark)) {
608 /* HIT */
609 read_unlock(&__ip_vs_rs_lock);
610 return dest;
611 }
612 }
613 read_unlock(&__ip_vs_rs_lock);
614
615 return NULL;
616}
617
618/*
619 * Lookup destination by {addr,port} in the given service
620 */
621static struct ip_vs_dest *
7937df15
JV
622ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
623 __be16 dport)
1da177e4
LT
624{
625 struct ip_vs_dest *dest;
626
627 /*
628 * Find the destination for the given service
629 */
630 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
631 if ((dest->af == svc->af)
632 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
633 && (dest->port == dport)) {
1da177e4
LT
634 /* HIT */
635 return dest;
636 }
637 }
638
639 return NULL;
640}
641
1e356f9c
RB
642/*
643 * Find destination by {daddr,dport,vaddr,protocol}
644 * Cretaed to be used in ip_vs_process_message() in
645 * the backup synchronization daemon. It finds the
646 * destination to be bound to the received connection
647 * on the backup.
648 *
649 * ip_vs_lookup_real_service() looked promissing, but
650 * seems not working as expected.
651 */
7937df15
JV
652struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
653 __be16 dport,
654 const union nf_inet_addr *vaddr,
655 __be16 vport, __u16 protocol)
1e356f9c
RB
656{
657 struct ip_vs_dest *dest;
658 struct ip_vs_service *svc;
659
7937df15 660 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
661 if (!svc)
662 return NULL;
663 dest = ip_vs_lookup_dest(svc, daddr, dport);
664 if (dest)
665 atomic_inc(&dest->refcnt);
666 ip_vs_service_put(svc);
667 return dest;
668}
1da177e4
LT
669
670/*
671 * Lookup dest by {svc,addr,port} in the destination trash.
672 * The destination trash is used to hold the destinations that are removed
673 * from the service table but are still referenced by some conn entries.
674 * The reason to add the destination trash is when the dest is temporary
675 * down (either by administrator or by monitor program), the dest can be
676 * picked back from the trash, the remaining connections to the dest can
677 * continue, and the counting information of the dest is also useful for
678 * scheduling.
679 */
680static struct ip_vs_dest *
7937df15
JV
681ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
682 __be16 dport)
1da177e4
LT
683{
684 struct ip_vs_dest *dest, *nxt;
685
686 /*
687 * Find the destination in trash
688 */
689 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
690 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
691 "dest->refcnt=%d\n",
692 dest->vfwmark,
693 IP_VS_DBG_ADDR(svc->af, &dest->addr),
694 ntohs(dest->port),
695 atomic_read(&dest->refcnt));
696 if (dest->af == svc->af &&
697 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
698 dest->port == dport &&
699 dest->vfwmark == svc->fwmark &&
700 dest->protocol == svc->protocol &&
701 (svc->fwmark ||
7937df15 702 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
703 dest->vport == svc->port))) {
704 /* HIT */
705 return dest;
706 }
707
708 /*
709 * Try to purge the destination from trash if not referenced
710 */
711 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
712 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
713 "from trash\n",
714 dest->vfwmark,
715 IP_VS_DBG_ADDR(svc->af, &dest->addr),
716 ntohs(dest->port));
1da177e4
LT
717 list_del(&dest->n_list);
718 ip_vs_dst_reset(dest);
719 __ip_vs_unbind_svc(dest);
720 kfree(dest);
721 }
722 }
723
724 return NULL;
725}
726
727
728/*
729 * Clean up all the destinations in the trash
730 * Called by the ip_vs_control_cleanup()
731 *
732 * When the ip_vs_control_clearup is activated by ipvs module exit,
733 * the service tables must have been flushed and all the connections
734 * are expired, and the refcnt of each destination in the trash must
735 * be 1, so we simply release them here.
736 */
737static void ip_vs_trash_cleanup(void)
738{
739 struct ip_vs_dest *dest, *nxt;
740
741 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
742 list_del(&dest->n_list);
743 ip_vs_dst_reset(dest);
744 __ip_vs_unbind_svc(dest);
745 kfree(dest);
746 }
747}
748
749
750static void
751ip_vs_zero_stats(struct ip_vs_stats *stats)
752{
753 spin_lock_bh(&stats->lock);
e93615d0 754
e9c0ce23 755 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 756 ip_vs_zero_estimator(stats);
e93615d0 757
3a14a313 758 spin_unlock_bh(&stats->lock);
1da177e4
LT
759}
760
761/*
762 * Update a destination in the given service
763 */
764static void
765__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 766 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
767{
768 int conn_flags;
769
770 /* set the weight and the flags */
771 atomic_set(&dest->weight, udest->weight);
3575792e
JA
772 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
773 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4
LT
774
775 /* check if local node and update the flags */
09571c7a
VB
776#ifdef CONFIG_IP_VS_IPV6
777 if (svc->af == AF_INET6) {
778 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
779 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
780 | IP_VS_CONN_F_LOCALNODE;
781 }
782 } else
783#endif
784 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
785 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
786 | IP_VS_CONN_F_LOCALNODE;
787 }
1da177e4
LT
788
789 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 790 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
791 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
792 } else {
793 /*
794 * Put the real service in ip_vs_rtable if not present.
795 * For now only for NAT!
796 */
797 write_lock_bh(&__ip_vs_rs_lock);
798 ip_vs_rs_hash(dest);
799 write_unlock_bh(&__ip_vs_rs_lock);
800 }
801 atomic_set(&dest->conn_flags, conn_flags);
802
803 /* bind the service */
804 if (!dest->svc) {
805 __ip_vs_bind_svc(dest, svc);
806 } else {
807 if (dest->svc != svc) {
808 __ip_vs_unbind_svc(dest);
809 ip_vs_zero_stats(&dest->stats);
810 __ip_vs_bind_svc(dest, svc);
811 }
812 }
813
814 /* set the dest status flags */
815 dest->flags |= IP_VS_DEST_F_AVAILABLE;
816
817 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
818 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
819 dest->u_threshold = udest->u_threshold;
820 dest->l_threshold = udest->l_threshold;
821}
822
823
824/*
825 * Create a destination for the given service
826 */
827static int
c860c6b1 828ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
829 struct ip_vs_dest **dest_p)
830{
831 struct ip_vs_dest *dest;
832 unsigned atype;
833
834 EnterFunction(2);
835
09571c7a
VB
836#ifdef CONFIG_IP_VS_IPV6
837 if (svc->af == AF_INET6) {
838 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
839 if ((!(atype & IPV6_ADDR_UNICAST) ||
840 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
841 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
842 return -EINVAL;
843 } else
844#endif
845 {
846 atype = inet_addr_type(&init_net, udest->addr.ip);
847 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
848 return -EINVAL;
849 }
1da177e4 850
dee06e47 851 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 852 if (dest == NULL) {
1e3e238e 853 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
854 return -ENOMEM;
855 }
1da177e4 856
c860c6b1 857 dest->af = svc->af;
1da177e4 858 dest->protocol = svc->protocol;
c860c6b1 859 dest->vaddr = svc->addr;
1da177e4
LT
860 dest->vport = svc->port;
861 dest->vfwmark = svc->fwmark;
c860c6b1 862 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
863 dest->port = udest->port;
864
865 atomic_set(&dest->activeconns, 0);
866 atomic_set(&dest->inactconns, 0);
867 atomic_set(&dest->persistconns, 0);
868 atomic_set(&dest->refcnt, 0);
869
870 INIT_LIST_HEAD(&dest->d_list);
871 spin_lock_init(&dest->dst_lock);
872 spin_lock_init(&dest->stats.lock);
873 __ip_vs_update_dest(svc, dest, udest);
874 ip_vs_new_estimator(&dest->stats);
875
876 *dest_p = dest;
877
878 LeaveFunction(2);
879 return 0;
880}
881
882
883/*
884 * Add a destination into an existing service
885 */
886static int
c860c6b1 887ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
888{
889 struct ip_vs_dest *dest;
c860c6b1 890 union nf_inet_addr daddr;
014d730d 891 __be16 dport = udest->port;
1da177e4
LT
892 int ret;
893
894 EnterFunction(2);
895
896 if (udest->weight < 0) {
1e3e238e 897 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
898 return -ERANGE;
899 }
900
901 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
902 pr_err("%s(): lower threshold is higher than upper threshold\n",
903 __func__);
1da177e4
LT
904 return -ERANGE;
905 }
906
c860c6b1
JV
907 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
908
1da177e4
LT
909 /*
910 * Check if the dest already exists in the list
911 */
7937df15
JV
912 dest = ip_vs_lookup_dest(svc, &daddr, dport);
913
1da177e4 914 if (dest != NULL) {
1e3e238e 915 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
916 return -EEXIST;
917 }
918
919 /*
920 * Check if the dest already exists in the trash and
921 * is from the same service
922 */
7937df15
JV
923 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
924
1da177e4 925 if (dest != NULL) {
cfc78c5a
JV
926 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
927 "dest->refcnt=%d, service %u/%s:%u\n",
928 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
929 atomic_read(&dest->refcnt),
930 dest->vfwmark,
931 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
932 ntohs(dest->vport));
933
1da177e4
LT
934 __ip_vs_update_dest(svc, dest, udest);
935
936 /*
937 * Get the destination from the trash
938 */
939 list_del(&dest->n_list);
940
941 ip_vs_new_estimator(&dest->stats);
942
943 write_lock_bh(&__ip_vs_svc_lock);
944
945 /*
946 * Wait until all other svc users go away.
947 */
948 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
949
950 list_add(&dest->n_list, &svc->destinations);
951 svc->num_dests++;
952
953 /* call the update_service function of its scheduler */
82dfb6f3
SW
954 if (svc->scheduler->update_service)
955 svc->scheduler->update_service(svc);
1da177e4
LT
956
957 write_unlock_bh(&__ip_vs_svc_lock);
958 return 0;
959 }
960
961 /*
962 * Allocate and initialize the dest structure
963 */
964 ret = ip_vs_new_dest(svc, udest, &dest);
965 if (ret) {
966 return ret;
967 }
968
969 /*
970 * Add the dest entry into the list
971 */
972 atomic_inc(&dest->refcnt);
973
974 write_lock_bh(&__ip_vs_svc_lock);
975
976 /*
977 * Wait until all other svc users go away.
978 */
979 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
980
981 list_add(&dest->n_list, &svc->destinations);
982 svc->num_dests++;
983
984 /* call the update_service function of its scheduler */
82dfb6f3
SW
985 if (svc->scheduler->update_service)
986 svc->scheduler->update_service(svc);
1da177e4
LT
987
988 write_unlock_bh(&__ip_vs_svc_lock);
989
990 LeaveFunction(2);
991
992 return 0;
993}
994
995
996/*
997 * Edit a destination in the given service
998 */
999static int
c860c6b1 1000ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1001{
1002 struct ip_vs_dest *dest;
c860c6b1 1003 union nf_inet_addr daddr;
014d730d 1004 __be16 dport = udest->port;
1da177e4
LT
1005
1006 EnterFunction(2);
1007
1008 if (udest->weight < 0) {
1e3e238e 1009 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
1010 return -ERANGE;
1011 }
1012
1013 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
1014 pr_err("%s(): lower threshold is higher than upper threshold\n",
1015 __func__);
1da177e4
LT
1016 return -ERANGE;
1017 }
1018
c860c6b1
JV
1019 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1020
1da177e4
LT
1021 /*
1022 * Lookup the destination list
1023 */
7937df15
JV
1024 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1025
1da177e4 1026 if (dest == NULL) {
1e3e238e 1027 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1028 return -ENOENT;
1029 }
1030
1031 __ip_vs_update_dest(svc, dest, udest);
1032
1033 write_lock_bh(&__ip_vs_svc_lock);
1034
1035 /* Wait until all other svc users go away */
cae7ca3d 1036 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1037
1038 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1039 if (svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
1da177e4
LT
1041
1042 write_unlock_bh(&__ip_vs_svc_lock);
1043
1044 LeaveFunction(2);
1045
1046 return 0;
1047}
1048
1049
1050/*
1051 * Delete a destination (must be already unlinked from the service)
1052 */
1053static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1054{
1055 ip_vs_kill_estimator(&dest->stats);
1056
1057 /*
1058 * Remove it from the d-linked list with the real services.
1059 */
1060 write_lock_bh(&__ip_vs_rs_lock);
1061 ip_vs_rs_unhash(dest);
1062 write_unlock_bh(&__ip_vs_rs_lock);
1063
1064 /*
1065 * Decrease the refcnt of the dest, and free the dest
1066 * if nobody refers to it (refcnt=0). Otherwise, throw
1067 * the destination into the trash.
1068 */
1069 if (atomic_dec_and_test(&dest->refcnt)) {
1070 ip_vs_dst_reset(dest);
1071 /* simply decrease svc->refcnt here, let the caller check
1072 and release the service if nobody refers to it.
1073 Only user context can release destination and service,
1074 and only one user context can update virtual service at a
1075 time, so the operation here is OK */
1076 atomic_dec(&dest->svc->refcnt);
1077 kfree(dest);
1078 } else {
cfc78c5a
JV
1079 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1080 "dest->refcnt=%d\n",
1081 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1082 ntohs(dest->port),
1083 atomic_read(&dest->refcnt));
1da177e4
LT
1084 list_add(&dest->n_list, &ip_vs_dest_trash);
1085 atomic_inc(&dest->refcnt);
1086 }
1087}
1088
1089
1090/*
1091 * Unlink a destination from the given service
1092 */
1093static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1094 struct ip_vs_dest *dest,
1095 int svcupd)
1096{
1097 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1098
1099 /*
1100 * Remove it from the d-linked destination list.
1101 */
1102 list_del(&dest->n_list);
1103 svc->num_dests--;
82dfb6f3
SW
1104
1105 /*
1106 * Call the update_service function of its scheduler
1107 */
1108 if (svcupd && svc->scheduler->update_service)
1109 svc->scheduler->update_service(svc);
1da177e4
LT
1110}
1111
1112
1113/*
1114 * Delete a destination server in the given service
1115 */
1116static int
c860c6b1 1117ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1118{
1119 struct ip_vs_dest *dest;
014d730d 1120 __be16 dport = udest->port;
1da177e4
LT
1121
1122 EnterFunction(2);
1123
7937df15 1124 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1125
1da177e4 1126 if (dest == NULL) {
1e3e238e 1127 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1128 return -ENOENT;
1129 }
1130
1131 write_lock_bh(&__ip_vs_svc_lock);
1132
1133 /*
1134 * Wait until all other svc users go away.
1135 */
1136 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1137
1138 /*
1139 * Unlink dest from the service
1140 */
1141 __ip_vs_unlink_dest(svc, dest, 1);
1142
1143 write_unlock_bh(&__ip_vs_svc_lock);
1144
1145 /*
1146 * Delete the destination
1147 */
1148 __ip_vs_del_dest(dest);
1149
1150 LeaveFunction(2);
1151
1152 return 0;
1153}
1154
1155
1156/*
1157 * Add a service into the service hash table
1158 */
1159static int
c860c6b1
JV
1160ip_vs_add_service(struct ip_vs_service_user_kern *u,
1161 struct ip_vs_service **svc_p)
1da177e4
LT
1162{
1163 int ret = 0;
1164 struct ip_vs_scheduler *sched = NULL;
1165 struct ip_vs_service *svc = NULL;
1166
1167 /* increase the module use count */
1168 ip_vs_use_count_inc();
1169
1170 /* Lookup the scheduler by 'u->sched_name' */
1171 sched = ip_vs_scheduler_get(u->sched_name);
1172 if (sched == NULL) {
1e3e238e 1173 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1174 ret = -ENOENT;
1175 goto out_mod_dec;
1176 }
1177
f94fd041 1178#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1179 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1180 ret = -EINVAL;
1181 goto out_err;
f94fd041
JV
1182 }
1183#endif
1184
dee06e47 1185 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1186 if (svc == NULL) {
1e3e238e 1187 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1188 ret = -ENOMEM;
1189 goto out_err;
1190 }
1da177e4
LT
1191
1192 /* I'm the first user of the service */
1193 atomic_set(&svc->usecnt, 1);
1194 atomic_set(&svc->refcnt, 0);
1195
c860c6b1 1196 svc->af = u->af;
1da177e4 1197 svc->protocol = u->protocol;
c860c6b1 1198 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1199 svc->port = u->port;
1200 svc->fwmark = u->fwmark;
1201 svc->flags = u->flags;
1202 svc->timeout = u->timeout * HZ;
1203 svc->netmask = u->netmask;
1204
1205 INIT_LIST_HEAD(&svc->destinations);
1206 rwlock_init(&svc->sched_lock);
1207 spin_lock_init(&svc->stats.lock);
1208
1209 /* Bind the scheduler */
1210 ret = ip_vs_bind_scheduler(svc, sched);
1211 if (ret)
1212 goto out_err;
1213 sched = NULL;
1214
1215 /* Update the virtual service counters */
1216 if (svc->port == FTPPORT)
1217 atomic_inc(&ip_vs_ftpsvc_counter);
1218 else if (svc->port == 0)
1219 atomic_inc(&ip_vs_nullsvc_counter);
1220
1221 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1222
1223 /* Count only IPv4 services for old get/setsockopt interface */
1224 if (svc->af == AF_INET)
1225 ip_vs_num_services++;
1da177e4
LT
1226
1227 /* Hash the service into the service table */
1228 write_lock_bh(&__ip_vs_svc_lock);
1229 ip_vs_svc_hash(svc);
1230 write_unlock_bh(&__ip_vs_svc_lock);
1231
1232 *svc_p = svc;
1233 return 0;
1234
1235 out_err:
1236 if (svc != NULL) {
1237 if (svc->scheduler)
1238 ip_vs_unbind_scheduler(svc);
1239 if (svc->inc) {
1240 local_bh_disable();
1241 ip_vs_app_inc_put(svc->inc);
1242 local_bh_enable();
1243 }
1244 kfree(svc);
1245 }
1246 ip_vs_scheduler_put(sched);
1247
1248 out_mod_dec:
1249 /* decrease the module use count */
1250 ip_vs_use_count_dec();
1251
1252 return ret;
1253}
1254
1255
1256/*
1257 * Edit a service and bind it with a new scheduler
1258 */
1259static int
c860c6b1 1260ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1261{
1262 struct ip_vs_scheduler *sched, *old_sched;
1263 int ret = 0;
1264
1265 /*
1266 * Lookup the scheduler, by 'u->sched_name'
1267 */
1268 sched = ip_vs_scheduler_get(u->sched_name);
1269 if (sched == NULL) {
1e3e238e 1270 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1271 return -ENOENT;
1272 }
1273 old_sched = sched;
1274
f94fd041 1275#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1276 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1277 ret = -EINVAL;
1278 goto out;
f94fd041
JV
1279 }
1280#endif
1281
1da177e4
LT
1282 write_lock_bh(&__ip_vs_svc_lock);
1283
1284 /*
1285 * Wait until all other svc users go away.
1286 */
1287 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1288
1289 /*
1290 * Set the flags and timeout value
1291 */
1292 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1293 svc->timeout = u->timeout * HZ;
1294 svc->netmask = u->netmask;
1295
1296 old_sched = svc->scheduler;
1297 if (sched != old_sched) {
1298 /*
1299 * Unbind the old scheduler
1300 */
1301 if ((ret = ip_vs_unbind_scheduler(svc))) {
1302 old_sched = sched;
9e691ed6 1303 goto out_unlock;
1da177e4
LT
1304 }
1305
1306 /*
1307 * Bind the new scheduler
1308 */
1309 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1310 /*
1311 * If ip_vs_bind_scheduler fails, restore the old
1312 * scheduler.
1313 * The main reason of failure is out of memory.
1314 *
1315 * The question is if the old scheduler can be
1316 * restored all the time. TODO: if it cannot be
1317 * restored some time, we must delete the service,
1318 * otherwise the system may crash.
1319 */
1320 ip_vs_bind_scheduler(svc, old_sched);
1321 old_sched = sched;
9e691ed6 1322 goto out_unlock;
1da177e4
LT
1323 }
1324 }
1325
9e691ed6 1326 out_unlock:
1da177e4 1327 write_unlock_bh(&__ip_vs_svc_lock);
8d5803bf 1328#ifdef CONFIG_IP_VS_IPV6
9e691ed6 1329 out:
8d5803bf 1330#endif
1da177e4
LT
1331
1332 if (old_sched)
1333 ip_vs_scheduler_put(old_sched);
1334
1335 return ret;
1336}
1337
1338
1339/*
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1343 */
1344static void __ip_vs_del_service(struct ip_vs_service *svc)
1345{
1346 struct ip_vs_dest *dest, *nxt;
1347 struct ip_vs_scheduler *old_sched;
1348
f94fd041
JV
1349 /* Count only IPv4 services for old get/setsockopt interface */
1350 if (svc->af == AF_INET)
1351 ip_vs_num_services--;
1352
1da177e4
LT
1353 ip_vs_kill_estimator(&svc->stats);
1354
1355 /* Unbind scheduler */
1356 old_sched = svc->scheduler;
1357 ip_vs_unbind_scheduler(svc);
1358 if (old_sched)
1359 ip_vs_scheduler_put(old_sched);
1360
1361 /* Unbind app inc */
1362 if (svc->inc) {
1363 ip_vs_app_inc_put(svc->inc);
1364 svc->inc = NULL;
1365 }
1366
1367 /*
1368 * Unlink the whole destination list
1369 */
1370 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1371 __ip_vs_unlink_dest(svc, dest, 0);
1372 __ip_vs_del_dest(dest);
1373 }
1374
1375 /*
1376 * Update the virtual service counters
1377 */
1378 if (svc->port == FTPPORT)
1379 atomic_dec(&ip_vs_ftpsvc_counter);
1380 else if (svc->port == 0)
1381 atomic_dec(&ip_vs_nullsvc_counter);
1382
1383 /*
1384 * Free the service if nobody refers to it
1385 */
1386 if (atomic_read(&svc->refcnt) == 0)
1387 kfree(svc);
1388
1389 /* decrease the module use count */
1390 ip_vs_use_count_dec();
1391}
1392
1393/*
1394 * Delete a service from the service list
1395 */
1396static int ip_vs_del_service(struct ip_vs_service *svc)
1397{
1398 if (svc == NULL)
1399 return -EEXIST;
1400
1401 /*
1402 * Unhash it from the service table
1403 */
1404 write_lock_bh(&__ip_vs_svc_lock);
1405
1406 ip_vs_svc_unhash(svc);
1407
1408 /*
1409 * Wait until all the svc users go away.
1410 */
1411 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1412
1413 __ip_vs_del_service(svc);
1414
1415 write_unlock_bh(&__ip_vs_svc_lock);
1416
1417 return 0;
1418}
1419
1420
1421/*
1422 * Flush all the virtual services
1423 */
1424static int ip_vs_flush(void)
1425{
1426 int idx;
1427 struct ip_vs_service *svc, *nxt;
1428
1429 /*
1430 * Flush the service table hashed by <protocol,addr,port>
1431 */
1432 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1433 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1434 write_lock_bh(&__ip_vs_svc_lock);
1435 ip_vs_svc_unhash(svc);
1436 /*
1437 * Wait until all the svc users go away.
1438 */
1439 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1440 __ip_vs_del_service(svc);
1441 write_unlock_bh(&__ip_vs_svc_lock);
1442 }
1443 }
1444
1445 /*
1446 * Flush the service table hashed by fwmark
1447 */
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) {
1451 write_lock_bh(&__ip_vs_svc_lock);
1452 ip_vs_svc_unhash(svc);
1453 /*
1454 * Wait until all the svc users go away.
1455 */
1456 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1457 __ip_vs_del_service(svc);
1458 write_unlock_bh(&__ip_vs_svc_lock);
1459 }
1460 }
1461
1462 return 0;
1463}
1464
1465
1466/*
1467 * Zero counters in a service or all services
1468 */
1469static int ip_vs_zero_service(struct ip_vs_service *svc)
1470{
1471 struct ip_vs_dest *dest;
1472
1473 write_lock_bh(&__ip_vs_svc_lock);
1474 list_for_each_entry(dest, &svc->destinations, n_list) {
1475 ip_vs_zero_stats(&dest->stats);
1476 }
1477 ip_vs_zero_stats(&svc->stats);
1478 write_unlock_bh(&__ip_vs_svc_lock);
1479 return 0;
1480}
1481
1482static int ip_vs_zero_all(void)
1483{
1484 int idx;
1485 struct ip_vs_service *svc;
1486
1487 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1489 ip_vs_zero_service(svc);
1490 }
1491 }
1492
1493 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1494 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1495 ip_vs_zero_service(svc);
1496 }
1497 }
1498
1499 ip_vs_zero_stats(&ip_vs_stats);
1500 return 0;
1501}
1502
1503
1504static int
8d65af78 1505proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1506 void __user *buffer, size_t *lenp, loff_t *ppos)
1507{
1508 int *valp = table->data;
1509 int val = *valp;
1510 int rc;
1511
8d65af78 1512 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1513 if (write && (*valp != val)) {
1514 if ((*valp < 0) || (*valp > 3)) {
1515 /* Restore the correct value */
1516 *valp = val;
1517 } else {
1da177e4 1518 update_defense_level();
1da177e4
LT
1519 }
1520 }
1521 return rc;
1522}
1523
1524
1525static int
8d65af78 1526proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1527 void __user *buffer, size_t *lenp, loff_t *ppos)
1528{
1529 int *valp = table->data;
1530 int val[2];
1531 int rc;
1532
1533 /* backup the value first */
1534 memcpy(val, valp, sizeof(val));
1535
8d65af78 1536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1537 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1538 /* Restore the correct value */
1539 memcpy(valp, val, sizeof(val));
1540 }
1541 return rc;
1542}
1543
1544
1545/*
1546 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1547 */
1548
1549static struct ctl_table vs_vars[] = {
1550 {
1da177e4
LT
1551 .procname = "amemthresh",
1552 .data = &sysctl_ip_vs_amemthresh,
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
6d9f239a 1555 .proc_handler = proc_dointvec,
1da177e4
LT
1556 },
1557#ifdef CONFIG_IP_VS_DEBUG
1558 {
1da177e4
LT
1559 .procname = "debug_level",
1560 .data = &sysctl_ip_vs_debug_level,
1561 .maxlen = sizeof(int),
1562 .mode = 0644,
6d9f239a 1563 .proc_handler = proc_dointvec,
1da177e4
LT
1564 },
1565#endif
1566 {
1da177e4
LT
1567 .procname = "am_droprate",
1568 .data = &sysctl_ip_vs_am_droprate,
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
6d9f239a 1571 .proc_handler = proc_dointvec,
1da177e4
LT
1572 },
1573 {
1da177e4
LT
1574 .procname = "drop_entry",
1575 .data = &sysctl_ip_vs_drop_entry,
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
6d9f239a 1578 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1579 },
1580 {
1da177e4
LT
1581 .procname = "drop_packet",
1582 .data = &sysctl_ip_vs_drop_packet,
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
6d9f239a 1585 .proc_handler = proc_do_defense_mode,
1da177e4 1586 },
f4bc17cd
JA
1587#ifdef CONFIG_IP_VS_NFCT
1588 {
1589 .procname = "conntrack",
1590 .data = &sysctl_ip_vs_conntrack,
1591 .maxlen = sizeof(int),
1592 .mode = 0644,
1593 .proc_handler = &proc_dointvec,
1594 },
1595#endif
1da177e4 1596 {
1da177e4
LT
1597 .procname = "secure_tcp",
1598 .data = &sysctl_ip_vs_secure_tcp,
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
6d9f239a 1601 .proc_handler = proc_do_defense_mode,
1da177e4 1602 },
8a803040
JA
1603 {
1604 .procname = "snat_reroute",
1605 .data = &sysctl_ip_vs_snat_reroute,
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = &proc_dointvec,
1609 },
1da177e4
LT
1610#if 0
1611 {
1da177e4
LT
1612 .procname = "timeout_established",
1613 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
6d9f239a 1616 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1617 },
1618 {
1da177e4
LT
1619 .procname = "timeout_synsent",
1620 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
6d9f239a 1623 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1624 },
1625 {
1da177e4
LT
1626 .procname = "timeout_synrecv",
1627 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1628 .maxlen = sizeof(int),
1629 .mode = 0644,
6d9f239a 1630 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1631 },
1632 {
1da177e4
LT
1633 .procname = "timeout_finwait",
1634 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1635 .maxlen = sizeof(int),
1636 .mode = 0644,
6d9f239a 1637 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1638 },
1639 {
1da177e4
LT
1640 .procname = "timeout_timewait",
1641 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
6d9f239a 1644 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1645 },
1646 {
1da177e4
LT
1647 .procname = "timeout_close",
1648 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1649 .maxlen = sizeof(int),
1650 .mode = 0644,
6d9f239a 1651 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1652 },
1653 {
1da177e4
LT
1654 .procname = "timeout_closewait",
1655 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1656 .maxlen = sizeof(int),
1657 .mode = 0644,
6d9f239a 1658 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1659 },
1660 {
1da177e4
LT
1661 .procname = "timeout_lastack",
1662 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1663 .maxlen = sizeof(int),
1664 .mode = 0644,
6d9f239a 1665 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1666 },
1667 {
1da177e4
LT
1668 .procname = "timeout_listen",
1669 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1670 .maxlen = sizeof(int),
1671 .mode = 0644,
6d9f239a 1672 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1673 },
1674 {
1da177e4
LT
1675 .procname = "timeout_synack",
1676 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1677 .maxlen = sizeof(int),
1678 .mode = 0644,
6d9f239a 1679 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1680 },
1681 {
1da177e4
LT
1682 .procname = "timeout_udp",
1683 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1684 .maxlen = sizeof(int),
1685 .mode = 0644,
6d9f239a 1686 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1687 },
1688 {
1da177e4
LT
1689 .procname = "timeout_icmp",
1690 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1691 .maxlen = sizeof(int),
1692 .mode = 0644,
6d9f239a 1693 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1694 },
1695#endif
1696 {
1da177e4
LT
1697 .procname = "cache_bypass",
1698 .data = &sysctl_ip_vs_cache_bypass,
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
6d9f239a 1701 .proc_handler = proc_dointvec,
1da177e4
LT
1702 },
1703 {
1da177e4
LT
1704 .procname = "expire_nodest_conn",
1705 .data = &sysctl_ip_vs_expire_nodest_conn,
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
6d9f239a 1708 .proc_handler = proc_dointvec,
1da177e4
LT
1709 },
1710 {
1da177e4
LT
1711 .procname = "expire_quiescent_template",
1712 .data = &sysctl_ip_vs_expire_quiescent_template,
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
6d9f239a 1715 .proc_handler = proc_dointvec,
1da177e4
LT
1716 },
1717 {
1da177e4
LT
1718 .procname = "sync_threshold",
1719 .data = &sysctl_ip_vs_sync_threshold,
1720 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1721 .mode = 0644,
6d9f239a 1722 .proc_handler = proc_do_sync_threshold,
1da177e4
LT
1723 },
1724 {
1da177e4
LT
1725 .procname = "nat_icmp_send",
1726 .data = &sysctl_ip_vs_nat_icmp_send,
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
6d9f239a 1729 .proc_handler = proc_dointvec,
1da177e4 1730 },
f8572d8f 1731 { }
1da177e4
LT
1732};
1733
5587da55 1734const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1735 { .procname = "net", },
1736 { .procname = "ipv4", },
90754f8e
PE
1737 { .procname = "vs", },
1738 { }
1da177e4 1739};
90754f8e 1740EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1741
1742static struct ctl_table_header * sysctl_header;
1743
1744#ifdef CONFIG_PROC_FS
1745
1746struct ip_vs_iter {
1747 struct list_head *table;
1748 int bucket;
1749};
1750
1751/*
1752 * Write the contents of the VS rule table to a PROCfs file.
1753 * (It is kept just for backward compatibility)
1754 */
1755static inline const char *ip_vs_fwd_name(unsigned flags)
1756{
1757 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1758 case IP_VS_CONN_F_LOCALNODE:
1759 return "Local";
1760 case IP_VS_CONN_F_TUNNEL:
1761 return "Tunnel";
1762 case IP_VS_CONN_F_DROUTE:
1763 return "Route";
1764 default:
1765 return "Masq";
1766 }
1767}
1768
1769
1770/* Get the Nth entry in the two lists */
1771static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1772{
1773 struct ip_vs_iter *iter = seq->private;
1774 int idx;
1775 struct ip_vs_service *svc;
1776
1777 /* look in hash by protocol */
1778 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1779 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1780 if (pos-- == 0){
1781 iter->table = ip_vs_svc_table;
1782 iter->bucket = idx;
1783 return svc;
1784 }
1785 }
1786 }
1787
1788 /* keep looking in fwmark */
1789 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1790 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1791 if (pos-- == 0) {
1792 iter->table = ip_vs_svc_fwm_table;
1793 iter->bucket = idx;
1794 return svc;
1795 }
1796 }
1797 }
1798
1799 return NULL;
1800}
1801
1802static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1803__acquires(__ip_vs_svc_lock)
1da177e4
LT
1804{
1805
1806 read_lock_bh(&__ip_vs_svc_lock);
1807 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1808}
1809
1810
1811static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1812{
1813 struct list_head *e;
1814 struct ip_vs_iter *iter;
1815 struct ip_vs_service *svc;
1816
1817 ++*pos;
1818 if (v == SEQ_START_TOKEN)
1819 return ip_vs_info_array(seq,0);
1820
1821 svc = v;
1822 iter = seq->private;
1823
1824 if (iter->table == ip_vs_svc_table) {
1825 /* next service in table hashed by protocol */
1826 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1827 return list_entry(e, struct ip_vs_service, s_list);
1828
1829
1830 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1831 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1832 s_list) {
1833 return svc;
1834 }
1835 }
1836
1837 iter->table = ip_vs_svc_fwm_table;
1838 iter->bucket = -1;
1839 goto scan_fwmark;
1840 }
1841
1842 /* next service in hashed by fwmark */
1843 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1844 return list_entry(e, struct ip_vs_service, f_list);
1845
1846 scan_fwmark:
1847 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1848 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1849 f_list)
1850 return svc;
1851 }
1852
1853 return NULL;
1854}
1855
1856static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1857__releases(__ip_vs_svc_lock)
1da177e4
LT
1858{
1859 read_unlock_bh(&__ip_vs_svc_lock);
1860}
1861
1862
1863static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1864{
1865 if (v == SEQ_START_TOKEN) {
1866 seq_printf(seq,
1867 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1868 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1869 seq_puts(seq,
1870 "Prot LocalAddress:Port Scheduler Flags\n");
1871 seq_puts(seq,
1872 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1873 } else {
1874 const struct ip_vs_service *svc = v;
1875 const struct ip_vs_iter *iter = seq->private;
1876 const struct ip_vs_dest *dest;
1877
667a5f18
VB
1878 if (iter->table == ip_vs_svc_table) {
1879#ifdef CONFIG_IP_VS_IPV6
1880 if (svc->af == AF_INET6)
5b095d98 1881 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1882 ip_vs_proto_name(svc->protocol),
38ff4fa4 1883 &svc->addr.in6,
667a5f18
VB
1884 ntohs(svc->port),
1885 svc->scheduler->name);
1886 else
1887#endif
26ec037f 1888 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1889 ip_vs_proto_name(svc->protocol),
1890 ntohl(svc->addr.ip),
1891 ntohs(svc->port),
26ec037f
NC
1892 svc->scheduler->name,
1893 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1894 } else {
26ec037f
NC
1895 seq_printf(seq, "FWM %08X %s %s",
1896 svc->fwmark, svc->scheduler->name,
1897 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1898 }
1da177e4
LT
1899
1900 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1901 seq_printf(seq, "persistent %d %08X\n",
1902 svc->timeout,
1903 ntohl(svc->netmask));
1904 else
1905 seq_putc(seq, '\n');
1906
1907 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1908#ifdef CONFIG_IP_VS_IPV6
1909 if (dest->af == AF_INET6)
1910 seq_printf(seq,
5b095d98 1911 " -> [%pI6]:%04X"
667a5f18 1912 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1913 &dest->addr.in6,
667a5f18
VB
1914 ntohs(dest->port),
1915 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1916 atomic_read(&dest->weight),
1917 atomic_read(&dest->activeconns),
1918 atomic_read(&dest->inactconns));
1919 else
1920#endif
1921 seq_printf(seq,
1922 " -> %08X:%04X "
1923 "%-7s %-6d %-10d %-10d\n",
1924 ntohl(dest->addr.ip),
1925 ntohs(dest->port),
1926 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1927 atomic_read(&dest->weight),
1928 atomic_read(&dest->activeconns),
1929 atomic_read(&dest->inactconns));
1930
1da177e4
LT
1931 }
1932 }
1933 return 0;
1934}
1935
56b3d975 1936static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1937 .start = ip_vs_info_seq_start,
1938 .next = ip_vs_info_seq_next,
1939 .stop = ip_vs_info_seq_stop,
1940 .show = ip_vs_info_seq_show,
1941};
1942
1943static int ip_vs_info_open(struct inode *inode, struct file *file)
1944{
cf7732e4
PE
1945 return seq_open_private(file, &ip_vs_info_seq_ops,
1946 sizeof(struct ip_vs_iter));
1da177e4
LT
1947}
1948
9a32144e 1949static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1950 .owner = THIS_MODULE,
1951 .open = ip_vs_info_open,
1952 .read = seq_read,
1953 .llseek = seq_lseek,
1954 .release = seq_release_private,
1955};
1956
1957#endif
1958
519e49e8
SW
1959struct ip_vs_stats ip_vs_stats = {
1960 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1961};
1da177e4
LT
1962
1963#ifdef CONFIG_PROC_FS
1964static int ip_vs_stats_show(struct seq_file *seq, void *v)
1965{
1966
1967/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1968 seq_puts(seq,
1969 " Total Incoming Outgoing Incoming Outgoing\n");
1970 seq_printf(seq,
1971 " Conns Packets Packets Bytes Bytes\n");
1972
1973 spin_lock_bh(&ip_vs_stats.lock);
e9c0ce23
SW
1974 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1975 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1976 (unsigned long long) ip_vs_stats.ustats.inbytes,
1977 (unsigned long long) ip_vs_stats.ustats.outbytes);
1da177e4
LT
1978
1979/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1980 seq_puts(seq,
1981 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1982 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
e9c0ce23
SW
1983 ip_vs_stats.ustats.cps,
1984 ip_vs_stats.ustats.inpps,
1985 ip_vs_stats.ustats.outpps,
1986 ip_vs_stats.ustats.inbps,
1987 ip_vs_stats.ustats.outbps);
1da177e4
LT
1988 spin_unlock_bh(&ip_vs_stats.lock);
1989
1990 return 0;
1991}
1992
1993static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1994{
1995 return single_open(file, ip_vs_stats_show, NULL);
1996}
1997
9a32144e 1998static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1999 .owner = THIS_MODULE,
2000 .open = ip_vs_stats_seq_open,
2001 .read = seq_read,
2002 .llseek = seq_lseek,
2003 .release = single_release,
2004};
2005
2006#endif
2007
2008/*
2009 * Set timeout values for tcp tcpfin udp in the timeout_table.
2010 */
2011static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2012{
2013 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2014 u->tcp_timeout,
2015 u->tcp_fin_timeout,
2016 u->udp_timeout);
2017
2018#ifdef CONFIG_IP_VS_PROTO_TCP
2019 if (u->tcp_timeout) {
2020 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2021 = u->tcp_timeout * HZ;
2022 }
2023
2024 if (u->tcp_fin_timeout) {
2025 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2026 = u->tcp_fin_timeout * HZ;
2027 }
2028#endif
2029
2030#ifdef CONFIG_IP_VS_PROTO_UDP
2031 if (u->udp_timeout) {
2032 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2033 = u->udp_timeout * HZ;
2034 }
2035#endif
2036 return 0;
2037}
2038
2039
2040#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2041#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2042#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2043 sizeof(struct ip_vs_dest_user))
2044#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2045#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2046#define MAX_ARG_LEN SVCDEST_ARG_LEN
2047
9b5b5cff 2048static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2049 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2053 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2054 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2055 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2056 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2057 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2058 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2059 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2060};
2061
c860c6b1
JV
2062static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2063 struct ip_vs_service_user *usvc_compat)
2064{
2065 usvc->af = AF_INET;
2066 usvc->protocol = usvc_compat->protocol;
2067 usvc->addr.ip = usvc_compat->addr;
2068 usvc->port = usvc_compat->port;
2069 usvc->fwmark = usvc_compat->fwmark;
2070
2071 /* Deep copy of sched_name is not needed here */
2072 usvc->sched_name = usvc_compat->sched_name;
2073
2074 usvc->flags = usvc_compat->flags;
2075 usvc->timeout = usvc_compat->timeout;
2076 usvc->netmask = usvc_compat->netmask;
2077}
2078
2079static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2080 struct ip_vs_dest_user *udest_compat)
2081{
2082 udest->addr.ip = udest_compat->addr;
2083 udest->port = udest_compat->port;
2084 udest->conn_flags = udest_compat->conn_flags;
2085 udest->weight = udest_compat->weight;
2086 udest->u_threshold = udest_compat->u_threshold;
2087 udest->l_threshold = udest_compat->l_threshold;
2088}
2089
1da177e4
LT
2090static int
2091do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2092{
2093 int ret;
2094 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2095 struct ip_vs_service_user *usvc_compat;
2096 struct ip_vs_service_user_kern usvc;
1da177e4 2097 struct ip_vs_service *svc;
c860c6b1
JV
2098 struct ip_vs_dest_user *udest_compat;
2099 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2100
2101 if (!capable(CAP_NET_ADMIN))
2102 return -EPERM;
2103
04bcef2a
AV
2104 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2105 return -EINVAL;
2106 if (len < 0 || len > MAX_ARG_LEN)
2107 return -EINVAL;
1da177e4 2108 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2109 pr_err("set_ctl: len %u != %u\n",
2110 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2111 return -EINVAL;
2112 }
2113
2114 if (copy_from_user(arg, user, len) != 0)
2115 return -EFAULT;
2116
2117 /* increase the module use count */
2118 ip_vs_use_count_inc();
2119
14cc3e2b 2120 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2121 ret = -ERESTARTSYS;
2122 goto out_dec;
2123 }
2124
2125 if (cmd == IP_VS_SO_SET_FLUSH) {
2126 /* Flush the virtual service */
2127 ret = ip_vs_flush();
2128 goto out_unlock;
2129 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2130 /* Set timeout values for (tcp tcpfin udp) */
2131 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2132 goto out_unlock;
2133 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2134 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2135 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2136 goto out_unlock;
2137 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2138 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2139 ret = stop_sync_thread(dm->state);
2140 goto out_unlock;
2141 }
2142
c860c6b1
JV
2143 usvc_compat = (struct ip_vs_service_user *)arg;
2144 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2145
2146 /* We only use the new structs internally, so copy userspace compat
2147 * structs to extended internal versions */
2148 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2149 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2150
2151 if (cmd == IP_VS_SO_SET_ZERO) {
2152 /* if no service address is set, zero counters in all */
c860c6b1 2153 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2154 ret = ip_vs_zero_all();
2155 goto out_unlock;
2156 }
2157 }
2158
2906f66a
VMR
2159 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2160 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2161 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2162 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2163 usvc.protocol, &usvc.addr.ip,
2164 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2165 ret = -EFAULT;
2166 goto out_unlock;
2167 }
2168
2169 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2170 if (usvc.fwmark == 0)
b18610de
JV
2171 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2172 &usvc.addr, usvc.port);
1da177e4 2173 else
b18610de 2174 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2175
2176 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2177 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2178 ret = -ESRCH;
b2aff963 2179 goto out_drop_service;
1da177e4
LT
2180 }
2181
2182 switch (cmd) {
2183 case IP_VS_SO_SET_ADD:
2184 if (svc != NULL)
2185 ret = -EEXIST;
2186 else
c860c6b1 2187 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2188 break;
2189 case IP_VS_SO_SET_EDIT:
c860c6b1 2190 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2191 break;
2192 case IP_VS_SO_SET_DEL:
2193 ret = ip_vs_del_service(svc);
2194 if (!ret)
2195 goto out_unlock;
2196 break;
2197 case IP_VS_SO_SET_ZERO:
2198 ret = ip_vs_zero_service(svc);
2199 break;
2200 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2201 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2202 break;
2203 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2204 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2205 break;
2206 case IP_VS_SO_SET_DELDEST:
c860c6b1 2207 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2208 break;
2209 default:
2210 ret = -EINVAL;
2211 }
2212
b2aff963 2213out_drop_service:
1da177e4
LT
2214 if (svc)
2215 ip_vs_service_put(svc);
2216
2217 out_unlock:
14cc3e2b 2218 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2219 out_dec:
2220 /* decrease the module use count */
2221 ip_vs_use_count_dec();
2222
2223 return ret;
2224}
2225
2226
2227static void
2228ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2229{
2230 spin_lock_bh(&src->lock);
e9c0ce23 2231 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2232 spin_unlock_bh(&src->lock);
2233}
2234
2235static void
2236ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2237{
2238 dst->protocol = src->protocol;
e7ade46a 2239 dst->addr = src->addr.ip;
1da177e4
LT
2240 dst->port = src->port;
2241 dst->fwmark = src->fwmark;
4da62fc7 2242 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2243 dst->flags = src->flags;
2244 dst->timeout = src->timeout / HZ;
2245 dst->netmask = src->netmask;
2246 dst->num_dests = src->num_dests;
2247 ip_vs_copy_stats(&dst->stats, &src->stats);
2248}
2249
2250static inline int
2251__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2252 struct ip_vs_get_services __user *uptr)
2253{
2254 int idx, count=0;
2255 struct ip_vs_service *svc;
2256 struct ip_vs_service_entry entry;
2257 int ret = 0;
2258
2259 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2260 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2261 /* Only expose IPv4 entries to old interface */
2262 if (svc->af != AF_INET)
2263 continue;
2264
1da177e4
LT
2265 if (count >= get->num_services)
2266 goto out;
4da62fc7 2267 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2268 ip_vs_copy_service(&entry, svc);
2269 if (copy_to_user(&uptr->entrytable[count],
2270 &entry, sizeof(entry))) {
2271 ret = -EFAULT;
2272 goto out;
2273 }
2274 count++;
2275 }
2276 }
2277
2278 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2279 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2280 /* Only expose IPv4 entries to old interface */
2281 if (svc->af != AF_INET)
2282 continue;
2283
1da177e4
LT
2284 if (count >= get->num_services)
2285 goto out;
4da62fc7 2286 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2287 ip_vs_copy_service(&entry, svc);
2288 if (copy_to_user(&uptr->entrytable[count],
2289 &entry, sizeof(entry))) {
2290 ret = -EFAULT;
2291 goto out;
2292 }
2293 count++;
2294 }
2295 }
2296 out:
2297 return ret;
2298}
2299
2300static inline int
2301__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2302 struct ip_vs_get_dests __user *uptr)
2303{
2304 struct ip_vs_service *svc;
b18610de 2305 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2306 int ret = 0;
2307
2308 if (get->fwmark)
b18610de 2309 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2310 else
b18610de
JV
2311 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2312 get->port);
2313
1da177e4
LT
2314 if (svc) {
2315 int count = 0;
2316 struct ip_vs_dest *dest;
2317 struct ip_vs_dest_entry entry;
2318
2319 list_for_each_entry(dest, &svc->destinations, n_list) {
2320 if (count >= get->num_dests)
2321 break;
2322
e7ade46a 2323 entry.addr = dest->addr.ip;
1da177e4
LT
2324 entry.port = dest->port;
2325 entry.conn_flags = atomic_read(&dest->conn_flags);
2326 entry.weight = atomic_read(&dest->weight);
2327 entry.u_threshold = dest->u_threshold;
2328 entry.l_threshold = dest->l_threshold;
2329 entry.activeconns = atomic_read(&dest->activeconns);
2330 entry.inactconns = atomic_read(&dest->inactconns);
2331 entry.persistconns = atomic_read(&dest->persistconns);
2332 ip_vs_copy_stats(&entry.stats, &dest->stats);
2333 if (copy_to_user(&uptr->entrytable[count],
2334 &entry, sizeof(entry))) {
2335 ret = -EFAULT;
2336 break;
2337 }
2338 count++;
2339 }
2340 ip_vs_service_put(svc);
2341 } else
2342 ret = -ESRCH;
2343 return ret;
2344}
2345
2346static inline void
2347__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2348{
2349#ifdef CONFIG_IP_VS_PROTO_TCP
2350 u->tcp_timeout =
2351 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2352 u->tcp_fin_timeout =
2353 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2354#endif
2355#ifdef CONFIG_IP_VS_PROTO_UDP
2356 u->udp_timeout =
2357 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2358#endif
2359}
2360
2361
2362#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2363#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2364#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2365#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2366#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2367#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2368#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2369
9b5b5cff 2370static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2371 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2372 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2373 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2374 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2375 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2376 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2377 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2378};
2379
2380static int
2381do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2382{
2383 unsigned char arg[128];
2384 int ret = 0;
04bcef2a 2385 unsigned int copylen;
1da177e4
LT
2386
2387 if (!capable(CAP_NET_ADMIN))
2388 return -EPERM;
2389
04bcef2a
AV
2390 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2391 return -EINVAL;
2392
1da177e4 2393 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2394 pr_err("get_ctl: len %u < %u\n",
2395 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2396 return -EINVAL;
2397 }
2398
04bcef2a
AV
2399 copylen = get_arglen[GET_CMDID(cmd)];
2400 if (copylen > 128)
2401 return -EINVAL;
2402
2403 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2404 return -EFAULT;
2405
14cc3e2b 2406 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2407 return -ERESTARTSYS;
2408
2409 switch (cmd) {
2410 case IP_VS_SO_GET_VERSION:
2411 {
2412 char buf[64];
2413
2414 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2415 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2416 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2417 ret = -EFAULT;
2418 goto out;
2419 }
2420 *len = strlen(buf)+1;
2421 }
2422 break;
2423
2424 case IP_VS_SO_GET_INFO:
2425 {
2426 struct ip_vs_getinfo info;
2427 info.version = IP_VS_VERSION_CODE;
6f7edb48 2428 info.size = ip_vs_conn_tab_size;
1da177e4
LT
2429 info.num_services = ip_vs_num_services;
2430 if (copy_to_user(user, &info, sizeof(info)) != 0)
2431 ret = -EFAULT;
2432 }
2433 break;
2434
2435 case IP_VS_SO_GET_SERVICES:
2436 {
2437 struct ip_vs_get_services *get;
2438 int size;
2439
2440 get = (struct ip_vs_get_services *)arg;
2441 size = sizeof(*get) +
2442 sizeof(struct ip_vs_service_entry) * get->num_services;
2443 if (*len != size) {
1e3e238e 2444 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2445 ret = -EINVAL;
2446 goto out;
2447 }
2448 ret = __ip_vs_get_service_entries(get, user);
2449 }
2450 break;
2451
2452 case IP_VS_SO_GET_SERVICE:
2453 {
2454 struct ip_vs_service_entry *entry;
2455 struct ip_vs_service *svc;
b18610de 2456 union nf_inet_addr addr;
1da177e4
LT
2457
2458 entry = (struct ip_vs_service_entry *)arg;
b18610de 2459 addr.ip = entry->addr;
1da177e4 2460 if (entry->fwmark)
b18610de 2461 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2462 else
b18610de
JV
2463 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2464 &addr, entry->port);
1da177e4
LT
2465 if (svc) {
2466 ip_vs_copy_service(entry, svc);
2467 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2468 ret = -EFAULT;
2469 ip_vs_service_put(svc);
2470 } else
2471 ret = -ESRCH;
2472 }
2473 break;
2474
2475 case IP_VS_SO_GET_DESTS:
2476 {
2477 struct ip_vs_get_dests *get;
2478 int size;
2479
2480 get = (struct ip_vs_get_dests *)arg;
2481 size = sizeof(*get) +
2482 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2483 if (*len != size) {
1e3e238e 2484 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2485 ret = -EINVAL;
2486 goto out;
2487 }
2488 ret = __ip_vs_get_dest_entries(get, user);
2489 }
2490 break;
2491
2492 case IP_VS_SO_GET_TIMEOUT:
2493 {
2494 struct ip_vs_timeout_user t;
2495
2496 __ip_vs_get_timeouts(&t);
2497 if (copy_to_user(user, &t, sizeof(t)) != 0)
2498 ret = -EFAULT;
2499 }
2500 break;
2501
2502 case IP_VS_SO_GET_DAEMON:
2503 {
2504 struct ip_vs_daemon_user d[2];
2505
2506 memset(&d, 0, sizeof(d));
2507 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2508 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2509 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2510 d[0].syncid = ip_vs_master_syncid;
2511 }
2512 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2513 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2514 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2515 d[1].syncid = ip_vs_backup_syncid;
2516 }
2517 if (copy_to_user(user, &d, sizeof(d)) != 0)
2518 ret = -EFAULT;
2519 }
2520 break;
2521
2522 default:
2523 ret = -EINVAL;
2524 }
2525
2526 out:
14cc3e2b 2527 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2528 return ret;
2529}
2530
2531
2532static struct nf_sockopt_ops ip_vs_sockopts = {
2533 .pf = PF_INET,
2534 .set_optmin = IP_VS_BASE_CTL,
2535 .set_optmax = IP_VS_SO_SET_MAX+1,
2536 .set = do_ip_vs_set_ctl,
2537 .get_optmin = IP_VS_BASE_CTL,
2538 .get_optmax = IP_VS_SO_GET_MAX+1,
2539 .get = do_ip_vs_get_ctl,
16fcec35 2540 .owner = THIS_MODULE,
1da177e4
LT
2541};
2542
9a812198
JV
2543/*
2544 * Generic Netlink interface
2545 */
2546
2547/* IPVS genetlink family */
2548static struct genl_family ip_vs_genl_family = {
2549 .id = GENL_ID_GENERATE,
2550 .hdrsize = 0,
2551 .name = IPVS_GENL_NAME,
2552 .version = IPVS_GENL_VERSION,
2553 .maxattr = IPVS_CMD_MAX,
2554};
2555
2556/* Policy used for first-level command attributes */
2557static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2558 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2559 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2560 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2561 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2562 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2563 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2564};
2565
2566/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2567static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2568 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2569 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2570 .len = IP_VS_IFNAME_MAXLEN },
2571 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2572};
2573
2574/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2575static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2576 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2577 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2578 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2579 .len = sizeof(union nf_inet_addr) },
2580 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2581 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2582 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2583 .len = IP_VS_SCHEDNAME_MAXLEN },
2584 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2585 .len = sizeof(struct ip_vs_flags) },
2586 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2587 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2588 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2589};
2590
2591/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2592static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2593 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2594 .len = sizeof(union nf_inet_addr) },
2595 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2596 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2597 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2598 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2599 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2600 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2601 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2602 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2603 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2604};
2605
2606static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2607 struct ip_vs_stats *stats)
2608{
2609 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2610 if (!nl_stats)
2611 return -EMSGSIZE;
2612
2613 spin_lock_bh(&stats->lock);
2614
e9c0ce23
SW
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2618 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2619 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2620 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2621 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2622 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2623 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2624 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2625
2626 spin_unlock_bh(&stats->lock);
2627
2628 nla_nest_end(skb, nl_stats);
2629
2630 return 0;
2631
2632nla_put_failure:
2633 spin_unlock_bh(&stats->lock);
2634 nla_nest_cancel(skb, nl_stats);
2635 return -EMSGSIZE;
2636}
2637
2638static int ip_vs_genl_fill_service(struct sk_buff *skb,
2639 struct ip_vs_service *svc)
2640{
2641 struct nlattr *nl_service;
2642 struct ip_vs_flags flags = { .flags = svc->flags,
2643 .mask = ~0 };
2644
2645 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2646 if (!nl_service)
2647 return -EMSGSIZE;
2648
f94fd041 2649 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2650
2651 if (svc->fwmark) {
2652 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2653 } else {
2654 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2655 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2656 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2657 }
2658
2659 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2660 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2661 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2662 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2663
2664 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2665 goto nla_put_failure;
2666
2667 nla_nest_end(skb, nl_service);
2668
2669 return 0;
2670
2671nla_put_failure:
2672 nla_nest_cancel(skb, nl_service);
2673 return -EMSGSIZE;
2674}
2675
2676static int ip_vs_genl_dump_service(struct sk_buff *skb,
2677 struct ip_vs_service *svc,
2678 struct netlink_callback *cb)
2679{
2680 void *hdr;
2681
2682 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2683 &ip_vs_genl_family, NLM_F_MULTI,
2684 IPVS_CMD_NEW_SERVICE);
2685 if (!hdr)
2686 return -EMSGSIZE;
2687
2688 if (ip_vs_genl_fill_service(skb, svc) < 0)
2689 goto nla_put_failure;
2690
2691 return genlmsg_end(skb, hdr);
2692
2693nla_put_failure:
2694 genlmsg_cancel(skb, hdr);
2695 return -EMSGSIZE;
2696}
2697
2698static int ip_vs_genl_dump_services(struct sk_buff *skb,
2699 struct netlink_callback *cb)
2700{
2701 int idx = 0, i;
2702 int start = cb->args[0];
2703 struct ip_vs_service *svc;
2704
2705 mutex_lock(&__ip_vs_mutex);
2706 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2707 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2708 if (++idx <= start)
2709 continue;
2710 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2711 idx--;
2712 goto nla_put_failure;
2713 }
2714 }
2715 }
2716
2717 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2718 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2719 if (++idx <= start)
2720 continue;
2721 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2722 idx--;
2723 goto nla_put_failure;
2724 }
2725 }
2726 }
2727
2728nla_put_failure:
2729 mutex_unlock(&__ip_vs_mutex);
2730 cb->args[0] = idx;
2731
2732 return skb->len;
2733}
2734
c860c6b1 2735static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2736 struct nlattr *nla, int full_entry)
2737{
2738 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2739 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2740
2741 /* Parse mandatory identifying service fields first */
2742 if (nla == NULL ||
2743 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2744 return -EINVAL;
2745
2746 nla_af = attrs[IPVS_SVC_ATTR_AF];
2747 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2748 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2749 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2750 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2751
2752 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2753 return -EINVAL;
2754
258c8893
SH
2755 memset(usvc, 0, sizeof(*usvc));
2756
c860c6b1 2757 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2758#ifdef CONFIG_IP_VS_IPV6
2759 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2760#else
2761 if (usvc->af != AF_INET)
2762#endif
9a812198
JV
2763 return -EAFNOSUPPORT;
2764
2765 if (nla_fwmark) {
2766 usvc->protocol = IPPROTO_TCP;
2767 usvc->fwmark = nla_get_u32(nla_fwmark);
2768 } else {
2769 usvc->protocol = nla_get_u16(nla_protocol);
2770 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2771 usvc->port = nla_get_u16(nla_port);
2772 usvc->fwmark = 0;
2773 }
2774
2775 /* If a full entry was requested, check for the additional fields */
2776 if (full_entry) {
2777 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2778 *nla_netmask;
2779 struct ip_vs_flags flags;
2780 struct ip_vs_service *svc;
2781
2782 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2783 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2784 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2785 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2786
2787 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2788 return -EINVAL;
2789
2790 nla_memcpy(&flags, nla_flags, sizeof(flags));
2791
2792 /* prefill flags from service if it already exists */
2793 if (usvc->fwmark)
b18610de 2794 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2795 else
b18610de
JV
2796 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2797 &usvc->addr, usvc->port);
9a812198
JV
2798 if (svc) {
2799 usvc->flags = svc->flags;
2800 ip_vs_service_put(svc);
2801 } else
2802 usvc->flags = 0;
2803
2804 /* set new flags from userland */
2805 usvc->flags = (usvc->flags & ~flags.mask) |
2806 (flags.flags & flags.mask);
c860c6b1 2807 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2808 usvc->timeout = nla_get_u32(nla_timeout);
2809 usvc->netmask = nla_get_u32(nla_netmask);
2810 }
2811
2812 return 0;
2813}
2814
2815static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2816{
c860c6b1 2817 struct ip_vs_service_user_kern usvc;
9a812198
JV
2818 int ret;
2819
2820 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2821 if (ret)
2822 return ERR_PTR(ret);
2823
2824 if (usvc.fwmark)
b18610de 2825 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2826 else
b18610de
JV
2827 return __ip_vs_service_get(usvc.af, usvc.protocol,
2828 &usvc.addr, usvc.port);
9a812198
JV
2829}
2830
2831static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2832{
2833 struct nlattr *nl_dest;
2834
2835 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2836 if (!nl_dest)
2837 return -EMSGSIZE;
2838
2839 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2840 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2841
2842 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2843 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2844 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2845 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2846 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2847 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2848 atomic_read(&dest->activeconns));
2849 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2850 atomic_read(&dest->inactconns));
2851 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2852 atomic_read(&dest->persistconns));
2853
2854 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2855 goto nla_put_failure;
2856
2857 nla_nest_end(skb, nl_dest);
2858
2859 return 0;
2860
2861nla_put_failure:
2862 nla_nest_cancel(skb, nl_dest);
2863 return -EMSGSIZE;
2864}
2865
2866static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2867 struct netlink_callback *cb)
2868{
2869 void *hdr;
2870
2871 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2872 &ip_vs_genl_family, NLM_F_MULTI,
2873 IPVS_CMD_NEW_DEST);
2874 if (!hdr)
2875 return -EMSGSIZE;
2876
2877 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2878 goto nla_put_failure;
2879
2880 return genlmsg_end(skb, hdr);
2881
2882nla_put_failure:
2883 genlmsg_cancel(skb, hdr);
2884 return -EMSGSIZE;
2885}
2886
2887static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2888 struct netlink_callback *cb)
2889{
2890 int idx = 0;
2891 int start = cb->args[0];
2892 struct ip_vs_service *svc;
2893 struct ip_vs_dest *dest;
2894 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2895
2896 mutex_lock(&__ip_vs_mutex);
2897
2898 /* Try to find the service for which to dump destinations */
2899 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2900 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2901 goto out_err;
2902
2903 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2904 if (IS_ERR(svc) || svc == NULL)
2905 goto out_err;
2906
2907 /* Dump the destinations */
2908 list_for_each_entry(dest, &svc->destinations, n_list) {
2909 if (++idx <= start)
2910 continue;
2911 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2912 idx--;
2913 goto nla_put_failure;
2914 }
2915 }
2916
2917nla_put_failure:
2918 cb->args[0] = idx;
2919 ip_vs_service_put(svc);
2920
2921out_err:
2922 mutex_unlock(&__ip_vs_mutex);
2923
2924 return skb->len;
2925}
2926
c860c6b1 2927static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2928 struct nlattr *nla, int full_entry)
2929{
2930 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2931 struct nlattr *nla_addr, *nla_port;
2932
2933 /* Parse mandatory identifying destination fields first */
2934 if (nla == NULL ||
2935 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2936 return -EINVAL;
2937
2938 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2939 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2940
2941 if (!(nla_addr && nla_port))
2942 return -EINVAL;
2943
258c8893
SH
2944 memset(udest, 0, sizeof(*udest));
2945
9a812198
JV
2946 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2947 udest->port = nla_get_u16(nla_port);
2948
2949 /* If a full entry was requested, check for the additional fields */
2950 if (full_entry) {
2951 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2952 *nla_l_thresh;
2953
2954 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2955 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2956 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2957 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2958
2959 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2960 return -EINVAL;
2961
2962 udest->conn_flags = nla_get_u32(nla_fwd)
2963 & IP_VS_CONN_F_FWD_MASK;
2964 udest->weight = nla_get_u32(nla_weight);
2965 udest->u_threshold = nla_get_u32(nla_u_thresh);
2966 udest->l_threshold = nla_get_u32(nla_l_thresh);
2967 }
2968
2969 return 0;
2970}
2971
2972static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2973 const char *mcast_ifn, __be32 syncid)
2974{
2975 struct nlattr *nl_daemon;
2976
2977 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2978 if (!nl_daemon)
2979 return -EMSGSIZE;
2980
2981 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2982 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2983 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2984
2985 nla_nest_end(skb, nl_daemon);
2986
2987 return 0;
2988
2989nla_put_failure:
2990 nla_nest_cancel(skb, nl_daemon);
2991 return -EMSGSIZE;
2992}
2993
2994static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2995 const char *mcast_ifn, __be32 syncid,
2996 struct netlink_callback *cb)
2997{
2998 void *hdr;
2999 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3000 &ip_vs_genl_family, NLM_F_MULTI,
3001 IPVS_CMD_NEW_DAEMON);
3002 if (!hdr)
3003 return -EMSGSIZE;
3004
3005 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3006 goto nla_put_failure;
3007
3008 return genlmsg_end(skb, hdr);
3009
3010nla_put_failure:
3011 genlmsg_cancel(skb, hdr);
3012 return -EMSGSIZE;
3013}
3014
3015static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3016 struct netlink_callback *cb)
3017{
3018 mutex_lock(&__ip_vs_mutex);
3019 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3020 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3021 ip_vs_master_mcast_ifn,
3022 ip_vs_master_syncid, cb) < 0)
3023 goto nla_put_failure;
3024
3025 cb->args[0] = 1;
3026 }
3027
3028 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3029 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3030 ip_vs_backup_mcast_ifn,
3031 ip_vs_backup_syncid, cb) < 0)
3032 goto nla_put_failure;
3033
3034 cb->args[1] = 1;
3035 }
3036
3037nla_put_failure:
3038 mutex_unlock(&__ip_vs_mutex);
3039
3040 return skb->len;
3041}
3042
3043static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3044{
3045 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3046 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3047 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3048 return -EINVAL;
3049
3050 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3051 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3052 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3053}
3054
3055static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3056{
3057 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3058 return -EINVAL;
3059
3060 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3061}
3062
3063static int ip_vs_genl_set_config(struct nlattr **attrs)
3064{
3065 struct ip_vs_timeout_user t;
3066
3067 __ip_vs_get_timeouts(&t);
3068
3069 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3070 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3071
3072 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3073 t.tcp_fin_timeout =
3074 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3075
3076 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3077 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3078
3079 return ip_vs_set_timeout(&t);
3080}
3081
3082static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3083{
3084 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3085 struct ip_vs_service_user_kern usvc;
3086 struct ip_vs_dest_user_kern udest;
9a812198
JV
3087 int ret = 0, cmd;
3088 int need_full_svc = 0, need_full_dest = 0;
3089
3090 cmd = info->genlhdr->cmd;
3091
3092 mutex_lock(&__ip_vs_mutex);
3093
3094 if (cmd == IPVS_CMD_FLUSH) {
3095 ret = ip_vs_flush();
3096 goto out;
3097 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3098 ret = ip_vs_genl_set_config(info->attrs);
3099 goto out;
3100 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3101 cmd == IPVS_CMD_DEL_DAEMON) {
3102
3103 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3104
3105 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3106 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3107 info->attrs[IPVS_CMD_ATTR_DAEMON],
3108 ip_vs_daemon_policy)) {
3109 ret = -EINVAL;
3110 goto out;
3111 }
3112
3113 if (cmd == IPVS_CMD_NEW_DAEMON)
3114 ret = ip_vs_genl_new_daemon(daemon_attrs);
3115 else
3116 ret = ip_vs_genl_del_daemon(daemon_attrs);
3117 goto out;
3118 } else if (cmd == IPVS_CMD_ZERO &&
3119 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3120 ret = ip_vs_zero_all();
3121 goto out;
3122 }
3123
3124 /* All following commands require a service argument, so check if we
3125 * received a valid one. We need a full service specification when
3126 * adding / editing a service. Only identifying members otherwise. */
3127 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3128 need_full_svc = 1;
3129
3130 ret = ip_vs_genl_parse_service(&usvc,
3131 info->attrs[IPVS_CMD_ATTR_SERVICE],
3132 need_full_svc);
3133 if (ret)
3134 goto out;
3135
3136 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3137 if (usvc.fwmark == 0)
b18610de
JV
3138 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3139 &usvc.addr, usvc.port);
9a812198 3140 else
b18610de 3141 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3142
3143 /* Unless we're adding a new service, the service must already exist */
3144 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3145 ret = -ESRCH;
3146 goto out;
3147 }
3148
3149 /* Destination commands require a valid destination argument. For
3150 * adding / editing a destination, we need a full destination
3151 * specification. */
3152 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3153 cmd == IPVS_CMD_DEL_DEST) {
3154 if (cmd != IPVS_CMD_DEL_DEST)
3155 need_full_dest = 1;
3156
3157 ret = ip_vs_genl_parse_dest(&udest,
3158 info->attrs[IPVS_CMD_ATTR_DEST],
3159 need_full_dest);
3160 if (ret)
3161 goto out;
3162 }
3163
3164 switch (cmd) {
3165 case IPVS_CMD_NEW_SERVICE:
3166 if (svc == NULL)
3167 ret = ip_vs_add_service(&usvc, &svc);
3168 else
3169 ret = -EEXIST;
3170 break;
3171 case IPVS_CMD_SET_SERVICE:
3172 ret = ip_vs_edit_service(svc, &usvc);
3173 break;
3174 case IPVS_CMD_DEL_SERVICE:
3175 ret = ip_vs_del_service(svc);
3176 break;
3177 case IPVS_CMD_NEW_DEST:
3178 ret = ip_vs_add_dest(svc, &udest);
3179 break;
3180 case IPVS_CMD_SET_DEST:
3181 ret = ip_vs_edit_dest(svc, &udest);
3182 break;
3183 case IPVS_CMD_DEL_DEST:
3184 ret = ip_vs_del_dest(svc, &udest);
3185 break;
3186 case IPVS_CMD_ZERO:
3187 ret = ip_vs_zero_service(svc);
3188 break;
3189 default:
3190 ret = -EINVAL;
3191 }
3192
3193out:
3194 if (svc)
3195 ip_vs_service_put(svc);
3196 mutex_unlock(&__ip_vs_mutex);
3197
3198 return ret;
3199}
3200
3201static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3202{
3203 struct sk_buff *msg;
3204 void *reply;
3205 int ret, cmd, reply_cmd;
3206
3207 cmd = info->genlhdr->cmd;
3208
3209 if (cmd == IPVS_CMD_GET_SERVICE)
3210 reply_cmd = IPVS_CMD_NEW_SERVICE;
3211 else if (cmd == IPVS_CMD_GET_INFO)
3212 reply_cmd = IPVS_CMD_SET_INFO;
3213 else if (cmd == IPVS_CMD_GET_CONFIG)
3214 reply_cmd = IPVS_CMD_SET_CONFIG;
3215 else {
1e3e238e 3216 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3217 return -EINVAL;
3218 }
3219
3220 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3221 if (!msg)
3222 return -ENOMEM;
3223
3224 mutex_lock(&__ip_vs_mutex);
3225
3226 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3227 if (reply == NULL)
3228 goto nla_put_failure;
3229
3230 switch (cmd) {
3231 case IPVS_CMD_GET_SERVICE:
3232 {
3233 struct ip_vs_service *svc;
3234
3235 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3236 if (IS_ERR(svc)) {
3237 ret = PTR_ERR(svc);
3238 goto out_err;
3239 } else if (svc) {
3240 ret = ip_vs_genl_fill_service(msg, svc);
3241 ip_vs_service_put(svc);
3242 if (ret)
3243 goto nla_put_failure;
3244 } else {
3245 ret = -ESRCH;
3246 goto out_err;
3247 }
3248
3249 break;
3250 }
3251
3252 case IPVS_CMD_GET_CONFIG:
3253 {
3254 struct ip_vs_timeout_user t;
3255
3256 __ip_vs_get_timeouts(&t);
3257#ifdef CONFIG_IP_VS_PROTO_TCP
3258 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3259 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3260 t.tcp_fin_timeout);
3261#endif
3262#ifdef CONFIG_IP_VS_PROTO_UDP
3263 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3264#endif
3265
3266 break;
3267 }
3268
3269 case IPVS_CMD_GET_INFO:
3270 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3271 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3272 ip_vs_conn_tab_size);
9a812198
JV
3273 break;
3274 }
3275
3276 genlmsg_end(msg, reply);
134e6375 3277 ret = genlmsg_reply(msg, info);
9a812198
JV
3278 goto out;
3279
3280nla_put_failure:
1e3e238e 3281 pr_err("not enough space in Netlink message\n");
9a812198
JV
3282 ret = -EMSGSIZE;
3283
3284out_err:
3285 nlmsg_free(msg);
3286out:
3287 mutex_unlock(&__ip_vs_mutex);
3288
3289 return ret;
3290}
3291
3292
3293static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3294 {
3295 .cmd = IPVS_CMD_NEW_SERVICE,
3296 .flags = GENL_ADMIN_PERM,
3297 .policy = ip_vs_cmd_policy,
3298 .doit = ip_vs_genl_set_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_SET_SERVICE,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3305 },
3306 {
3307 .cmd = IPVS_CMD_DEL_SERVICE,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .doit = ip_vs_genl_set_cmd,
3311 },
3312 {
3313 .cmd = IPVS_CMD_GET_SERVICE,
3314 .flags = GENL_ADMIN_PERM,
3315 .doit = ip_vs_genl_get_cmd,
3316 .dumpit = ip_vs_genl_dump_services,
3317 .policy = ip_vs_cmd_policy,
3318 },
3319 {
3320 .cmd = IPVS_CMD_NEW_DEST,
3321 .flags = GENL_ADMIN_PERM,
3322 .policy = ip_vs_cmd_policy,
3323 .doit = ip_vs_genl_set_cmd,
3324 },
3325 {
3326 .cmd = IPVS_CMD_SET_DEST,
3327 .flags = GENL_ADMIN_PERM,
3328 .policy = ip_vs_cmd_policy,
3329 .doit = ip_vs_genl_set_cmd,
3330 },
3331 {
3332 .cmd = IPVS_CMD_DEL_DEST,
3333 .flags = GENL_ADMIN_PERM,
3334 .policy = ip_vs_cmd_policy,
3335 .doit = ip_vs_genl_set_cmd,
3336 },
3337 {
3338 .cmd = IPVS_CMD_GET_DEST,
3339 .flags = GENL_ADMIN_PERM,
3340 .policy = ip_vs_cmd_policy,
3341 .dumpit = ip_vs_genl_dump_dests,
3342 },
3343 {
3344 .cmd = IPVS_CMD_NEW_DAEMON,
3345 .flags = GENL_ADMIN_PERM,
3346 .policy = ip_vs_cmd_policy,
3347 .doit = ip_vs_genl_set_cmd,
3348 },
3349 {
3350 .cmd = IPVS_CMD_DEL_DAEMON,
3351 .flags = GENL_ADMIN_PERM,
3352 .policy = ip_vs_cmd_policy,
3353 .doit = ip_vs_genl_set_cmd,
3354 },
3355 {
3356 .cmd = IPVS_CMD_GET_DAEMON,
3357 .flags = GENL_ADMIN_PERM,
3358 .dumpit = ip_vs_genl_dump_daemons,
3359 },
3360 {
3361 .cmd = IPVS_CMD_SET_CONFIG,
3362 .flags = GENL_ADMIN_PERM,
3363 .policy = ip_vs_cmd_policy,
3364 .doit = ip_vs_genl_set_cmd,
3365 },
3366 {
3367 .cmd = IPVS_CMD_GET_CONFIG,
3368 .flags = GENL_ADMIN_PERM,
3369 .doit = ip_vs_genl_get_cmd,
3370 },
3371 {
3372 .cmd = IPVS_CMD_GET_INFO,
3373 .flags = GENL_ADMIN_PERM,
3374 .doit = ip_vs_genl_get_cmd,
3375 },
3376 {
3377 .cmd = IPVS_CMD_ZERO,
3378 .flags = GENL_ADMIN_PERM,
3379 .policy = ip_vs_cmd_policy,
3380 .doit = ip_vs_genl_set_cmd,
3381 },
3382 {
3383 .cmd = IPVS_CMD_FLUSH,
3384 .flags = GENL_ADMIN_PERM,
3385 .doit = ip_vs_genl_set_cmd,
3386 },
3387};
3388
3389static int __init ip_vs_genl_register(void)
3390{
8f698d54
MM
3391 return genl_register_family_with_ops(&ip_vs_genl_family,
3392 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3393}
3394
3395static void ip_vs_genl_unregister(void)
3396{
3397 genl_unregister_family(&ip_vs_genl_family);
3398}
3399
3400/* End of Generic Netlink interface definitions */
3401
1da177e4 3402
048cf48b 3403int __init ip_vs_control_init(void)
1da177e4
LT
3404{
3405 int ret;
3406 int idx;
3407
3408 EnterFunction(2);
3409
3410 ret = nf_register_sockopt(&ip_vs_sockopts);
3411 if (ret) {
1e3e238e 3412 pr_err("cannot register sockopt.\n");
1da177e4
LT
3413 return ret;
3414 }
3415
9a812198
JV
3416 ret = ip_vs_genl_register();
3417 if (ret) {
1e3e238e 3418 pr_err("cannot register Generic Netlink interface.\n");
9a812198
JV
3419 nf_unregister_sockopt(&ip_vs_sockopts);
3420 return ret;
3421 }
3422
457c4cbc
EB
3423 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3424 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3425
90754f8e 3426 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3427
3428 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3429 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3430 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3431 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3432 }
3433 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3434 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3435 }
3436
1da177e4
LT
3437 ip_vs_new_estimator(&ip_vs_stats);
3438
3439 /* Hook the defense timer */
3440 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3441
3442 LeaveFunction(2);
3443 return 0;
3444}
3445
3446
3447void ip_vs_control_cleanup(void)
3448{
3449 EnterFunction(2);
3450 ip_vs_trash_cleanup();
3451 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3452 cancel_work_sync(&defense_work.work);
1da177e4
LT
3453 ip_vs_kill_estimator(&ip_vs_stats);
3454 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3455 proc_net_remove(&init_net, "ip_vs_stats");
3456 proc_net_remove(&init_net, "ip_vs");
9a812198 3457 ip_vs_genl_unregister();
1da177e4
LT
3458 nf_unregister_sockopt(&ip_vs_sockopts);
3459 LeaveFunction(2);
3460}