]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
ipvs: Restrict sync message to 255 connections
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
09571c7a
VB
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
14c85021 42#include <net/route.h>
1da177e4 43#include <net/sock.h>
9a812198 44#include <net/genetlink.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 51static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
52
53/* lock for service table */
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56/* lock for table with the real services */
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59/* lock for state and timeout tables */
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62/* lock for drop entry handling */
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65/* lock for drop packet handling */
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68/* 1/rate drop and drop-entry variables */
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73/* number of virtual services */
74static int ip_vs_num_services = 0;
75
76/* sysctl variables */
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
09571c7a
VB
98#ifdef CONFIG_IP_VS_IPV6
99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
1da177e4 118/*
af9debd4
JA
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
1da177e4
LT
121 */
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130 /* we only count free and buffered memory (in pages) */
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
134 we need adjust it */
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
af9debd4
JA
140 local_bh_disable();
141
1da177e4
LT
142 /* drop_entry */
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170 /* drop_packet */
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202 /* secure_tcp */
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
238
239 local_bh_enable();
1da177e4
LT
240}
241
242
243/*
244 * Timer for checking the defense
245 */
246#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 249
c4028958 250static void defense_work_handler(struct work_struct *work)
1da177e4
LT
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272/*
273 * Hash table: for virtual service lookups
274 */
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279/* the service table hashed by <protocol, addr, port> */
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281/* the service table hashed by fwmark */
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284/*
285 * Hash table: for real service lookups
286 */
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293/*
294 * Trash for destinations
295 */
296static LIST_HEAD(ip_vs_dest_trash);
297
298/*
299 * FTP & NULL virtual service counters
300 */
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305/*
306 * Returns hash value for virtual service
307 */
308static __inline__ unsigned
b18610de
JV
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
1da177e4
LT
311{
312 register unsigned porth = ntohs(port);
b18610de 313 __be32 addr_fold = addr->ip;
1da177e4 314
b18610de
JV
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
322 & IP_VS_SVC_TAB_MASK;
323}
324
325/*
326 * Returns hash value of fwmark for virtual service lookup
327 */
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333/*
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
337 */
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /*
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
351 */
b18610de 352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 353 svc->port);
1da177e4
LT
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356 /*
357 * Hash it by fwmark in ip_vs_svc_fwm_table
358 */
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370/*
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
373 */
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
385 } else {
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396/*
397 * Get service by {proto,addr,port} in the service table.
398 */
b18610de
JV
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
1da177e4
LT
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406 /* Check for "full" addressed entries */
b18610de 407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424/*
425 * Get service by {fwmark} in the service table.
426 */
b18610de
JV
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 437 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
438 /* HIT */
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
3c2e0505
JV
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
450{
451 struct ip_vs_service *svc;
3c2e0505 452
1da177e4
LT
453 read_lock(&__ip_vs_svc_lock);
454
455 /*
456 * Check the table hashed by fwmark first
457 */
3c2e0505 458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
459 goto out;
460
461 /*
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
464 */
3c2e0505 465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471 /*
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
474 */
3c2e0505 475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480 /*
481 * Check if the catch-all port (port zero) exists
482 */
3c2e0505 483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
3c2e0505
JV
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
1da177e4
LT
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516/*
517 * Returns hash value for real service
518 */
7937df15
JV
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
1da177e4
LT
522{
523 register unsigned porth = ntohs(port);
7937df15
JV
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
1da177e4 531
7937df15 532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
533 & IP_VS_RTAB_MASK;
534}
535
536/*
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
539 */
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548 /*
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
551 */
7937df15
JV
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
1da177e4
LT
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559/*
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
562 */
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565 /*
566 * Remove it from the ip_vs_rtable table.
567 */
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576/*
577 * Lookup real service by <proto,addr,port> in the real service table.
578 */
579struct ip_vs_dest *
7937df15
JV
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
1da177e4
LT
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587 /*
588 * Check for "full" addressed entries
589 * Return the first found entry
590 */
7937df15 591 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600 /* HIT */
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610/*
611 * Lookup destination by {addr,port} in the given service
612 */
613static struct ip_vs_dest *
7937df15
JV
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
1da177e4
LT
616{
617 struct ip_vs_dest *dest;
618
619 /*
620 * Find the destination for the given service
621 */
622 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
1da177e4
LT
626 /* HIT */
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
1e356f9c
RB
634/*
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
639 * on the backup.
640 *
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
643 */
7937df15
JV
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
1e356f9c
RB
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
7937df15 652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
1da177e4
LT
661
662/*
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
670 * scheduling.
671 */
672static struct ip_vs_dest *
7937df15
JV
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
1da177e4
LT
675{
676 struct ip_vs_dest *dest, *nxt;
677
678 /*
679 * Find the destination in trash
680 */
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
7937df15 694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
695 dest->vport == svc->port))) {
696 /* HIT */
697 return dest;
698 }
699
700 /*
701 * Try to purge the destination from trash if not referenced
702 */
703 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
1da177e4
LT
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720/*
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
723 *
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
728 */
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
e93615d0 746
e9c0ce23 747 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 748 ip_vs_zero_estimator(stats);
e93615d0 749
3a14a313 750 spin_unlock_bh(&stats->lock);
1da177e4
LT
751}
752
753/*
754 * Update a destination in the given service
755 */
756static void
757__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 758 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
759{
760 int conn_flags;
761
762 /* set the weight and the flags */
763 atomic_set(&dest->weight, udest->weight);
764 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
765
766 /* check if local node and update the flags */
09571c7a
VB
767#ifdef CONFIG_IP_VS_IPV6
768 if (svc->af == AF_INET6) {
769 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
770 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
771 | IP_VS_CONN_F_LOCALNODE;
772 }
773 } else
774#endif
775 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
776 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
777 | IP_VS_CONN_F_LOCALNODE;
778 }
1da177e4
LT
779
780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783 } else {
784 /*
785 * Put the real service in ip_vs_rtable if not present.
786 * For now only for NAT!
787 */
788 write_lock_bh(&__ip_vs_rs_lock);
789 ip_vs_rs_hash(dest);
790 write_unlock_bh(&__ip_vs_rs_lock);
791 }
792 atomic_set(&dest->conn_flags, conn_flags);
793
794 /* bind the service */
795 if (!dest->svc) {
796 __ip_vs_bind_svc(dest, svc);
797 } else {
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
802 }
803 }
804
805 /* set the dest status flags */
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
812}
813
814
815/*
816 * Create a destination for the given service
817 */
818static int
c860c6b1 819ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
820 struct ip_vs_dest **dest_p)
821{
822 struct ip_vs_dest *dest;
823 unsigned atype;
824
825 EnterFunction(2);
826
09571c7a
VB
827#ifdef CONFIG_IP_VS_IPV6
828 if (svc->af == AF_INET6) {
829 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
830 if ((!(atype & IPV6_ADDR_UNICAST) ||
831 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
832 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
833 return -EINVAL;
834 } else
835#endif
836 {
837 atype = inet_addr_type(&init_net, udest->addr.ip);
838 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
839 return -EINVAL;
840 }
1da177e4 841
0da974f4 842 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
843 if (dest == NULL) {
844 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
845 return -ENOMEM;
846 }
1da177e4 847
c860c6b1 848 dest->af = svc->af;
1da177e4 849 dest->protocol = svc->protocol;
c860c6b1 850 dest->vaddr = svc->addr;
1da177e4
LT
851 dest->vport = svc->port;
852 dest->vfwmark = svc->fwmark;
c860c6b1 853 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
854 dest->port = udest->port;
855
856 atomic_set(&dest->activeconns, 0);
857 atomic_set(&dest->inactconns, 0);
858 atomic_set(&dest->persistconns, 0);
859 atomic_set(&dest->refcnt, 0);
860
861 INIT_LIST_HEAD(&dest->d_list);
862 spin_lock_init(&dest->dst_lock);
863 spin_lock_init(&dest->stats.lock);
864 __ip_vs_update_dest(svc, dest, udest);
865 ip_vs_new_estimator(&dest->stats);
866
867 *dest_p = dest;
868
869 LeaveFunction(2);
870 return 0;
871}
872
873
874/*
875 * Add a destination into an existing service
876 */
877static int
c860c6b1 878ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
879{
880 struct ip_vs_dest *dest;
c860c6b1 881 union nf_inet_addr daddr;
014d730d 882 __be16 dport = udest->port;
1da177e4
LT
883 int ret;
884
885 EnterFunction(2);
886
887 if (udest->weight < 0) {
888 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
889 return -ERANGE;
890 }
891
892 if (udest->l_threshold > udest->u_threshold) {
893 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
894 "upper threshold\n");
895 return -ERANGE;
896 }
897
c860c6b1
JV
898 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
899
1da177e4
LT
900 /*
901 * Check if the dest already exists in the list
902 */
7937df15
JV
903 dest = ip_vs_lookup_dest(svc, &daddr, dport);
904
1da177e4
LT
905 if (dest != NULL) {
906 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
907 return -EEXIST;
908 }
909
910 /*
911 * Check if the dest already exists in the trash and
912 * is from the same service
913 */
7937df15
JV
914 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
915
1da177e4 916 if (dest != NULL) {
cfc78c5a
JV
917 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
918 "dest->refcnt=%d, service %u/%s:%u\n",
919 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
920 atomic_read(&dest->refcnt),
921 dest->vfwmark,
922 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
923 ntohs(dest->vport));
924
1da177e4
LT
925 __ip_vs_update_dest(svc, dest, udest);
926
927 /*
928 * Get the destination from the trash
929 */
930 list_del(&dest->n_list);
931
932 ip_vs_new_estimator(&dest->stats);
933
934 write_lock_bh(&__ip_vs_svc_lock);
935
936 /*
937 * Wait until all other svc users go away.
938 */
939 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
940
941 list_add(&dest->n_list, &svc->destinations);
942 svc->num_dests++;
943
944 /* call the update_service function of its scheduler */
82dfb6f3
SW
945 if (svc->scheduler->update_service)
946 svc->scheduler->update_service(svc);
1da177e4
LT
947
948 write_unlock_bh(&__ip_vs_svc_lock);
949 return 0;
950 }
951
952 /*
953 * Allocate and initialize the dest structure
954 */
955 ret = ip_vs_new_dest(svc, udest, &dest);
956 if (ret) {
957 return ret;
958 }
959
960 /*
961 * Add the dest entry into the list
962 */
963 atomic_inc(&dest->refcnt);
964
965 write_lock_bh(&__ip_vs_svc_lock);
966
967 /*
968 * Wait until all other svc users go away.
969 */
970 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
971
972 list_add(&dest->n_list, &svc->destinations);
973 svc->num_dests++;
974
975 /* call the update_service function of its scheduler */
82dfb6f3
SW
976 if (svc->scheduler->update_service)
977 svc->scheduler->update_service(svc);
1da177e4
LT
978
979 write_unlock_bh(&__ip_vs_svc_lock);
980
981 LeaveFunction(2);
982
983 return 0;
984}
985
986
987/*
988 * Edit a destination in the given service
989 */
990static int
c860c6b1 991ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
992{
993 struct ip_vs_dest *dest;
c860c6b1 994 union nf_inet_addr daddr;
014d730d 995 __be16 dport = udest->port;
1da177e4
LT
996
997 EnterFunction(2);
998
999 if (udest->weight < 0) {
1000 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1001 return -ERANGE;
1002 }
1003
1004 if (udest->l_threshold > udest->u_threshold) {
1005 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1006 "upper threshold\n");
1007 return -ERANGE;
1008 }
1009
c860c6b1
JV
1010 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1011
1da177e4
LT
1012 /*
1013 * Lookup the destination list
1014 */
7937df15
JV
1015 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1016
1da177e4
LT
1017 if (dest == NULL) {
1018 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1019 return -ENOENT;
1020 }
1021
1022 __ip_vs_update_dest(svc, dest, udest);
1023
1024 write_lock_bh(&__ip_vs_svc_lock);
1025
1026 /* Wait until all other svc users go away */
cae7ca3d 1027 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
1028
1029 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
1030 if (svc->scheduler->update_service)
1031 svc->scheduler->update_service(svc);
1da177e4
LT
1032
1033 write_unlock_bh(&__ip_vs_svc_lock);
1034
1035 LeaveFunction(2);
1036
1037 return 0;
1038}
1039
1040
1041/*
1042 * Delete a destination (must be already unlinked from the service)
1043 */
1044static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1045{
1046 ip_vs_kill_estimator(&dest->stats);
1047
1048 /*
1049 * Remove it from the d-linked list with the real services.
1050 */
1051 write_lock_bh(&__ip_vs_rs_lock);
1052 ip_vs_rs_unhash(dest);
1053 write_unlock_bh(&__ip_vs_rs_lock);
1054
1055 /*
1056 * Decrease the refcnt of the dest, and free the dest
1057 * if nobody refers to it (refcnt=0). Otherwise, throw
1058 * the destination into the trash.
1059 */
1060 if (atomic_dec_and_test(&dest->refcnt)) {
1061 ip_vs_dst_reset(dest);
1062 /* simply decrease svc->refcnt here, let the caller check
1063 and release the service if nobody refers to it.
1064 Only user context can release destination and service,
1065 and only one user context can update virtual service at a
1066 time, so the operation here is OK */
1067 atomic_dec(&dest->svc->refcnt);
1068 kfree(dest);
1069 } else {
cfc78c5a
JV
1070 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1071 "dest->refcnt=%d\n",
1072 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1073 ntohs(dest->port),
1074 atomic_read(&dest->refcnt));
1da177e4
LT
1075 list_add(&dest->n_list, &ip_vs_dest_trash);
1076 atomic_inc(&dest->refcnt);
1077 }
1078}
1079
1080
1081/*
1082 * Unlink a destination from the given service
1083 */
1084static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1085 struct ip_vs_dest *dest,
1086 int svcupd)
1087{
1088 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1089
1090 /*
1091 * Remove it from the d-linked destination list.
1092 */
1093 list_del(&dest->n_list);
1094 svc->num_dests--;
82dfb6f3
SW
1095
1096 /*
1097 * Call the update_service function of its scheduler
1098 */
1099 if (svcupd && svc->scheduler->update_service)
1100 svc->scheduler->update_service(svc);
1da177e4
LT
1101}
1102
1103
1104/*
1105 * Delete a destination server in the given service
1106 */
1107static int
c860c6b1 1108ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1109{
1110 struct ip_vs_dest *dest;
014d730d 1111 __be16 dport = udest->port;
1da177e4
LT
1112
1113 EnterFunction(2);
1114
7937df15 1115 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1116
1da177e4
LT
1117 if (dest == NULL) {
1118 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1119 return -ENOENT;
1120 }
1121
1122 write_lock_bh(&__ip_vs_svc_lock);
1123
1124 /*
1125 * Wait until all other svc users go away.
1126 */
1127 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1128
1129 /*
1130 * Unlink dest from the service
1131 */
1132 __ip_vs_unlink_dest(svc, dest, 1);
1133
1134 write_unlock_bh(&__ip_vs_svc_lock);
1135
1136 /*
1137 * Delete the destination
1138 */
1139 __ip_vs_del_dest(dest);
1140
1141 LeaveFunction(2);
1142
1143 return 0;
1144}
1145
1146
1147/*
1148 * Add a service into the service hash table
1149 */
1150static int
c860c6b1
JV
1151ip_vs_add_service(struct ip_vs_service_user_kern *u,
1152 struct ip_vs_service **svc_p)
1da177e4
LT
1153{
1154 int ret = 0;
1155 struct ip_vs_scheduler *sched = NULL;
1156 struct ip_vs_service *svc = NULL;
1157
1158 /* increase the module use count */
1159 ip_vs_use_count_inc();
1160
1161 /* Lookup the scheduler by 'u->sched_name' */
1162 sched = ip_vs_scheduler_get(u->sched_name);
1163 if (sched == NULL) {
1164 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1165 u->sched_name);
1166 ret = -ENOENT;
1167 goto out_mod_dec;
1168 }
1169
f94fd041
JV
1170#ifdef CONFIG_IP_VS_IPV6
1171 if (u->af == AF_INET6) {
1172 if (!sched->supports_ipv6) {
1173 ret = -EAFNOSUPPORT;
1174 goto out_err;
1175 }
1176 if ((u->netmask < 1) || (u->netmask > 128)) {
1177 ret = -EINVAL;
1178 goto out_err;
1179 }
1180 }
1181#endif
1182
0da974f4 1183 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1184 if (svc == NULL) {
1185 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1186 ret = -ENOMEM;
1187 goto out_err;
1188 }
1da177e4
LT
1189
1190 /* I'm the first user of the service */
1191 atomic_set(&svc->usecnt, 1);
1192 atomic_set(&svc->refcnt, 0);
1193
c860c6b1 1194 svc->af = u->af;
1da177e4 1195 svc->protocol = u->protocol;
c860c6b1 1196 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1197 svc->port = u->port;
1198 svc->fwmark = u->fwmark;
1199 svc->flags = u->flags;
1200 svc->timeout = u->timeout * HZ;
1201 svc->netmask = u->netmask;
1202
1203 INIT_LIST_HEAD(&svc->destinations);
1204 rwlock_init(&svc->sched_lock);
1205 spin_lock_init(&svc->stats.lock);
1206
1207 /* Bind the scheduler */
1208 ret = ip_vs_bind_scheduler(svc, sched);
1209 if (ret)
1210 goto out_err;
1211 sched = NULL;
1212
1213 /* Update the virtual service counters */
1214 if (svc->port == FTPPORT)
1215 atomic_inc(&ip_vs_ftpsvc_counter);
1216 else if (svc->port == 0)
1217 atomic_inc(&ip_vs_nullsvc_counter);
1218
1219 ip_vs_new_estimator(&svc->stats);
f94fd041
JV
1220
1221 /* Count only IPv4 services for old get/setsockopt interface */
1222 if (svc->af == AF_INET)
1223 ip_vs_num_services++;
1da177e4
LT
1224
1225 /* Hash the service into the service table */
1226 write_lock_bh(&__ip_vs_svc_lock);
1227 ip_vs_svc_hash(svc);
1228 write_unlock_bh(&__ip_vs_svc_lock);
1229
1230 *svc_p = svc;
1231 return 0;
1232
1233 out_err:
1234 if (svc != NULL) {
1235 if (svc->scheduler)
1236 ip_vs_unbind_scheduler(svc);
1237 if (svc->inc) {
1238 local_bh_disable();
1239 ip_vs_app_inc_put(svc->inc);
1240 local_bh_enable();
1241 }
1242 kfree(svc);
1243 }
1244 ip_vs_scheduler_put(sched);
1245
1246 out_mod_dec:
1247 /* decrease the module use count */
1248 ip_vs_use_count_dec();
1249
1250 return ret;
1251}
1252
1253
1254/*
1255 * Edit a service and bind it with a new scheduler
1256 */
1257static int
c860c6b1 1258ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1259{
1260 struct ip_vs_scheduler *sched, *old_sched;
1261 int ret = 0;
1262
1263 /*
1264 * Lookup the scheduler, by 'u->sched_name'
1265 */
1266 sched = ip_vs_scheduler_get(u->sched_name);
1267 if (sched == NULL) {
1268 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1269 u->sched_name);
1270 return -ENOENT;
1271 }
1272 old_sched = sched;
1273
f94fd041
JV
1274#ifdef CONFIG_IP_VS_IPV6
1275 if (u->af == AF_INET6) {
1276 if (!sched->supports_ipv6) {
a5ba4bf2 1277 ret = -EAFNOSUPPORT;
f94fd041
JV
1278 goto out;
1279 }
1280 if ((u->netmask < 1) || (u->netmask > 128)) {
a5ba4bf2 1281 ret = -EINVAL;
f94fd041
JV
1282 goto out;
1283 }
1284 }
1285#endif
1286
1da177e4
LT
1287 write_lock_bh(&__ip_vs_svc_lock);
1288
1289 /*
1290 * Wait until all other svc users go away.
1291 */
1292 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1293
1294 /*
1295 * Set the flags and timeout value
1296 */
1297 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298 svc->timeout = u->timeout * HZ;
1299 svc->netmask = u->netmask;
1300
1301 old_sched = svc->scheduler;
1302 if (sched != old_sched) {
1303 /*
1304 * Unbind the old scheduler
1305 */
1306 if ((ret = ip_vs_unbind_scheduler(svc))) {
1307 old_sched = sched;
9e691ed6 1308 goto out_unlock;
1da177e4
LT
1309 }
1310
1311 /*
1312 * Bind the new scheduler
1313 */
1314 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315 /*
1316 * If ip_vs_bind_scheduler fails, restore the old
1317 * scheduler.
1318 * The main reason of failure is out of memory.
1319 *
1320 * The question is if the old scheduler can be
1321 * restored all the time. TODO: if it cannot be
1322 * restored some time, we must delete the service,
1323 * otherwise the system may crash.
1324 */
1325 ip_vs_bind_scheduler(svc, old_sched);
1326 old_sched = sched;
9e691ed6 1327 goto out_unlock;
1da177e4
LT
1328 }
1329 }
1330
9e691ed6 1331 out_unlock:
1da177e4 1332 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1333 out:
1da177e4
LT
1334
1335 if (old_sched)
1336 ip_vs_scheduler_put(old_sched);
1337
1338 return ret;
1339}
1340
1341
1342/*
1343 * Delete a service from the service list
1344 * - The service must be unlinked, unlocked and not referenced!
1345 * - We are called under _bh lock
1346 */
1347static void __ip_vs_del_service(struct ip_vs_service *svc)
1348{
1349 struct ip_vs_dest *dest, *nxt;
1350 struct ip_vs_scheduler *old_sched;
1351
f94fd041
JV
1352 /* Count only IPv4 services for old get/setsockopt interface */
1353 if (svc->af == AF_INET)
1354 ip_vs_num_services--;
1355
1da177e4
LT
1356 ip_vs_kill_estimator(&svc->stats);
1357
1358 /* Unbind scheduler */
1359 old_sched = svc->scheduler;
1360 ip_vs_unbind_scheduler(svc);
1361 if (old_sched)
1362 ip_vs_scheduler_put(old_sched);
1363
1364 /* Unbind app inc */
1365 if (svc->inc) {
1366 ip_vs_app_inc_put(svc->inc);
1367 svc->inc = NULL;
1368 }
1369
1370 /*
1371 * Unlink the whole destination list
1372 */
1373 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1374 __ip_vs_unlink_dest(svc, dest, 0);
1375 __ip_vs_del_dest(dest);
1376 }
1377
1378 /*
1379 * Update the virtual service counters
1380 */
1381 if (svc->port == FTPPORT)
1382 atomic_dec(&ip_vs_ftpsvc_counter);
1383 else if (svc->port == 0)
1384 atomic_dec(&ip_vs_nullsvc_counter);
1385
1386 /*
1387 * Free the service if nobody refers to it
1388 */
1389 if (atomic_read(&svc->refcnt) == 0)
1390 kfree(svc);
1391
1392 /* decrease the module use count */
1393 ip_vs_use_count_dec();
1394}
1395
1396/*
1397 * Delete a service from the service list
1398 */
1399static int ip_vs_del_service(struct ip_vs_service *svc)
1400{
1401 if (svc == NULL)
1402 return -EEXIST;
1403
1404 /*
1405 * Unhash it from the service table
1406 */
1407 write_lock_bh(&__ip_vs_svc_lock);
1408
1409 ip_vs_svc_unhash(svc);
1410
1411 /*
1412 * Wait until all the svc users go away.
1413 */
1414 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1415
1416 __ip_vs_del_service(svc);
1417
1418 write_unlock_bh(&__ip_vs_svc_lock);
1419
1420 return 0;
1421}
1422
1423
1424/*
1425 * Flush all the virtual services
1426 */
1427static int ip_vs_flush(void)
1428{
1429 int idx;
1430 struct ip_vs_service *svc, *nxt;
1431
1432 /*
1433 * Flush the service table hashed by <protocol,addr,port>
1434 */
1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1436 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1437 write_lock_bh(&__ip_vs_svc_lock);
1438 ip_vs_svc_unhash(svc);
1439 /*
1440 * Wait until all the svc users go away.
1441 */
1442 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1443 __ip_vs_del_service(svc);
1444 write_unlock_bh(&__ip_vs_svc_lock);
1445 }
1446 }
1447
1448 /*
1449 * Flush the service table hashed by fwmark
1450 */
1451 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1452 list_for_each_entry_safe(svc, nxt,
1453 &ip_vs_svc_fwm_table[idx], f_list) {
1454 write_lock_bh(&__ip_vs_svc_lock);
1455 ip_vs_svc_unhash(svc);
1456 /*
1457 * Wait until all the svc users go away.
1458 */
1459 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1460 __ip_vs_del_service(svc);
1461 write_unlock_bh(&__ip_vs_svc_lock);
1462 }
1463 }
1464
1465 return 0;
1466}
1467
1468
1469/*
1470 * Zero counters in a service or all services
1471 */
1472static int ip_vs_zero_service(struct ip_vs_service *svc)
1473{
1474 struct ip_vs_dest *dest;
1475
1476 write_lock_bh(&__ip_vs_svc_lock);
1477 list_for_each_entry(dest, &svc->destinations, n_list) {
1478 ip_vs_zero_stats(&dest->stats);
1479 }
1480 ip_vs_zero_stats(&svc->stats);
1481 write_unlock_bh(&__ip_vs_svc_lock);
1482 return 0;
1483}
1484
1485static int ip_vs_zero_all(void)
1486{
1487 int idx;
1488 struct ip_vs_service *svc;
1489
1490 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1491 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1492 ip_vs_zero_service(svc);
1493 }
1494 }
1495
1496 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1497 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1498 ip_vs_zero_service(svc);
1499 }
1500 }
1501
1502 ip_vs_zero_stats(&ip_vs_stats);
1503 return 0;
1504}
1505
1506
1507static int
1508proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1509 void __user *buffer, size_t *lenp, loff_t *ppos)
1510{
1511 int *valp = table->data;
1512 int val = *valp;
1513 int rc;
1514
1515 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1516 if (write && (*valp != val)) {
1517 if ((*valp < 0) || (*valp > 3)) {
1518 /* Restore the correct value */
1519 *valp = val;
1520 } else {
1da177e4 1521 update_defense_level();
1da177e4
LT
1522 }
1523 }
1524 return rc;
1525}
1526
1527
1528static int
1529proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val[2];
1534 int rc;
1535
1536 /* backup the value first */
1537 memcpy(val, valp, sizeof(val));
1538
1539 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1540 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1541 /* Restore the correct value */
1542 memcpy(valp, val, sizeof(val));
1543 }
1544 return rc;
1545}
1546
1547
1548/*
1549 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1550 */
1551
1552static struct ctl_table vs_vars[] = {
1553 {
1da177e4
LT
1554 .procname = "amemthresh",
1555 .data = &sysctl_ip_vs_amemthresh,
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = &proc_dointvec,
1559 },
1560#ifdef CONFIG_IP_VS_DEBUG
1561 {
1da177e4
LT
1562 .procname = "debug_level",
1563 .data = &sysctl_ip_vs_debug_level,
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
1566 .proc_handler = &proc_dointvec,
1567 },
1568#endif
1569 {
1da177e4
LT
1570 .procname = "am_droprate",
1571 .data = &sysctl_ip_vs_am_droprate,
1572 .maxlen = sizeof(int),
1573 .mode = 0644,
1574 .proc_handler = &proc_dointvec,
1575 },
1576 {
1da177e4
LT
1577 .procname = "drop_entry",
1578 .data = &sysctl_ip_vs_drop_entry,
1579 .maxlen = sizeof(int),
1580 .mode = 0644,
1581 .proc_handler = &proc_do_defense_mode,
1582 },
1583 {
1da177e4
LT
1584 .procname = "drop_packet",
1585 .data = &sysctl_ip_vs_drop_packet,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_do_defense_mode,
1589 },
1590 {
1da177e4
LT
1591 .procname = "secure_tcp",
1592 .data = &sysctl_ip_vs_secure_tcp,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
1595 .proc_handler = &proc_do_defense_mode,
1596 },
1597#if 0
1598 {
1da177e4
LT
1599 .procname = "timeout_established",
1600 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec_jiffies,
1604 },
1605 {
1da177e4
LT
1606 .procname = "timeout_synsent",
1607 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = &proc_dointvec_jiffies,
1611 },
1612 {
1da177e4
LT
1613 .procname = "timeout_synrecv",
1614 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = &proc_dointvec_jiffies,
1618 },
1619 {
1da177e4
LT
1620 .procname = "timeout_finwait",
1621 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1622 .maxlen = sizeof(int),
1623 .mode = 0644,
1624 .proc_handler = &proc_dointvec_jiffies,
1625 },
1626 {
1da177e4
LT
1627 .procname = "timeout_timewait",
1628 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1629 .maxlen = sizeof(int),
1630 .mode = 0644,
1631 .proc_handler = &proc_dointvec_jiffies,
1632 },
1633 {
1da177e4
LT
1634 .procname = "timeout_close",
1635 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = &proc_dointvec_jiffies,
1639 },
1640 {
1da177e4
LT
1641 .procname = "timeout_closewait",
1642 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1643 .maxlen = sizeof(int),
1644 .mode = 0644,
1645 .proc_handler = &proc_dointvec_jiffies,
1646 },
1647 {
1da177e4
LT
1648 .procname = "timeout_lastack",
1649 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1650 .maxlen = sizeof(int),
1651 .mode = 0644,
1652 .proc_handler = &proc_dointvec_jiffies,
1653 },
1654 {
1da177e4
LT
1655 .procname = "timeout_listen",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
1659 .proc_handler = &proc_dointvec_jiffies,
1660 },
1661 {
1da177e4
LT
1662 .procname = "timeout_synack",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
1666 .proc_handler = &proc_dointvec_jiffies,
1667 },
1668 {
1da177e4
LT
1669 .procname = "timeout_udp",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
1673 .proc_handler = &proc_dointvec_jiffies,
1674 },
1675 {
1da177e4
LT
1676 .procname = "timeout_icmp",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
1680 .proc_handler = &proc_dointvec_jiffies,
1681 },
1682#endif
1683 {
1da177e4
LT
1684 .procname = "cache_bypass",
1685 .data = &sysctl_ip_vs_cache_bypass,
1686 .maxlen = sizeof(int),
1687 .mode = 0644,
1688 .proc_handler = &proc_dointvec,
1689 },
1690 {
1da177e4
LT
1691 .procname = "expire_nodest_conn",
1692 .data = &sysctl_ip_vs_expire_nodest_conn,
1693 .maxlen = sizeof(int),
1694 .mode = 0644,
1695 .proc_handler = &proc_dointvec,
1696 },
1697 {
1da177e4
LT
1698 .procname = "expire_quiescent_template",
1699 .data = &sysctl_ip_vs_expire_quiescent_template,
1700 .maxlen = sizeof(int),
1701 .mode = 0644,
1702 .proc_handler = &proc_dointvec,
1703 },
1704 {
1da177e4
LT
1705 .procname = "sync_threshold",
1706 .data = &sysctl_ip_vs_sync_threshold,
1707 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1708 .mode = 0644,
1709 .proc_handler = &proc_do_sync_threshold,
1710 },
1711 {
1da177e4
LT
1712 .procname = "nat_icmp_send",
1713 .data = &sysctl_ip_vs_nat_icmp_send,
1714 .maxlen = sizeof(int),
1715 .mode = 0644,
1716 .proc_handler = &proc_dointvec,
1717 },
1718 { .ctl_name = 0 }
1719};
1720
5587da55 1721const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1722 { .procname = "net", .ctl_name = CTL_NET, },
1723 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1724 { .procname = "vs", },
1725 { }
1da177e4 1726};
90754f8e 1727EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1728
1729static struct ctl_table_header * sysctl_header;
1730
1731#ifdef CONFIG_PROC_FS
1732
1733struct ip_vs_iter {
1734 struct list_head *table;
1735 int bucket;
1736};
1737
1738/*
1739 * Write the contents of the VS rule table to a PROCfs file.
1740 * (It is kept just for backward compatibility)
1741 */
1742static inline const char *ip_vs_fwd_name(unsigned flags)
1743{
1744 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1745 case IP_VS_CONN_F_LOCALNODE:
1746 return "Local";
1747 case IP_VS_CONN_F_TUNNEL:
1748 return "Tunnel";
1749 case IP_VS_CONN_F_DROUTE:
1750 return "Route";
1751 default:
1752 return "Masq";
1753 }
1754}
1755
1756
1757/* Get the Nth entry in the two lists */
1758static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1759{
1760 struct ip_vs_iter *iter = seq->private;
1761 int idx;
1762 struct ip_vs_service *svc;
1763
1764 /* look in hash by protocol */
1765 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1766 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1767 if (pos-- == 0){
1768 iter->table = ip_vs_svc_table;
1769 iter->bucket = idx;
1770 return svc;
1771 }
1772 }
1773 }
1774
1775 /* keep looking in fwmark */
1776 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1777 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1778 if (pos-- == 0) {
1779 iter->table = ip_vs_svc_fwm_table;
1780 iter->bucket = idx;
1781 return svc;
1782 }
1783 }
1784 }
1785
1786 return NULL;
1787}
1788
1789static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1790__acquires(__ip_vs_svc_lock)
1da177e4
LT
1791{
1792
1793 read_lock_bh(&__ip_vs_svc_lock);
1794 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1795}
1796
1797
1798static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1799{
1800 struct list_head *e;
1801 struct ip_vs_iter *iter;
1802 struct ip_vs_service *svc;
1803
1804 ++*pos;
1805 if (v == SEQ_START_TOKEN)
1806 return ip_vs_info_array(seq,0);
1807
1808 svc = v;
1809 iter = seq->private;
1810
1811 if (iter->table == ip_vs_svc_table) {
1812 /* next service in table hashed by protocol */
1813 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1814 return list_entry(e, struct ip_vs_service, s_list);
1815
1816
1817 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1818 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1819 s_list) {
1820 return svc;
1821 }
1822 }
1823
1824 iter->table = ip_vs_svc_fwm_table;
1825 iter->bucket = -1;
1826 goto scan_fwmark;
1827 }
1828
1829 /* next service in hashed by fwmark */
1830 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1831 return list_entry(e, struct ip_vs_service, f_list);
1832
1833 scan_fwmark:
1834 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1835 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1836 f_list)
1837 return svc;
1838 }
1839
1840 return NULL;
1841}
1842
1843static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1844__releases(__ip_vs_svc_lock)
1da177e4
LT
1845{
1846 read_unlock_bh(&__ip_vs_svc_lock);
1847}
1848
1849
1850static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1851{
1852 if (v == SEQ_START_TOKEN) {
1853 seq_printf(seq,
1854 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1855 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1856 seq_puts(seq,
1857 "Prot LocalAddress:Port Scheduler Flags\n");
1858 seq_puts(seq,
1859 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1860 } else {
1861 const struct ip_vs_service *svc = v;
1862 const struct ip_vs_iter *iter = seq->private;
1863 const struct ip_vs_dest *dest;
1864
667a5f18
VB
1865 if (iter->table == ip_vs_svc_table) {
1866#ifdef CONFIG_IP_VS_IPV6
1867 if (svc->af == AF_INET6)
1868 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1869 ip_vs_proto_name(svc->protocol),
1870 NIP6(svc->addr.in6),
1871 ntohs(svc->port),
1872 svc->scheduler->name);
1873 else
1874#endif
1875 seq_printf(seq, "%s %08X:%04X %s ",
1876 ip_vs_proto_name(svc->protocol),
1877 ntohl(svc->addr.ip),
1878 ntohs(svc->port),
1879 svc->scheduler->name);
1880 } else {
1da177e4
LT
1881 seq_printf(seq, "FWM %08X %s ",
1882 svc->fwmark, svc->scheduler->name);
667a5f18 1883 }
1da177e4
LT
1884
1885 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1886 seq_printf(seq, "persistent %d %08X\n",
1887 svc->timeout,
1888 ntohl(svc->netmask));
1889 else
1890 seq_putc(seq, '\n');
1891
1892 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1893#ifdef CONFIG_IP_VS_IPV6
1894 if (dest->af == AF_INET6)
1895 seq_printf(seq,
1896 " -> [" NIP6_FMT "]:%04X"
1897 " %-7s %-6d %-10d %-10d\n",
1898 NIP6(dest->addr.in6),
1899 ntohs(dest->port),
1900 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1901 atomic_read(&dest->weight),
1902 atomic_read(&dest->activeconns),
1903 atomic_read(&dest->inactconns));
1904 else
1905#endif
1906 seq_printf(seq,
1907 " -> %08X:%04X "
1908 "%-7s %-6d %-10d %-10d\n",
1909 ntohl(dest->addr.ip),
1910 ntohs(dest->port),
1911 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1912 atomic_read(&dest->weight),
1913 atomic_read(&dest->activeconns),
1914 atomic_read(&dest->inactconns));
1915
1da177e4
LT
1916 }
1917 }
1918 return 0;
1919}
1920
56b3d975 1921static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1922 .start = ip_vs_info_seq_start,
1923 .next = ip_vs_info_seq_next,
1924 .stop = ip_vs_info_seq_stop,
1925 .show = ip_vs_info_seq_show,
1926};
1927
1928static int ip_vs_info_open(struct inode *inode, struct file *file)
1929{
cf7732e4
PE
1930 return seq_open_private(file, &ip_vs_info_seq_ops,
1931 sizeof(struct ip_vs_iter));
1da177e4
LT
1932}
1933
9a32144e 1934static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1935 .owner = THIS_MODULE,
1936 .open = ip_vs_info_open,
1937 .read = seq_read,
1938 .llseek = seq_lseek,
1939 .release = seq_release_private,
1940};
1941
1942#endif
1943
519e49e8
SW
1944struct ip_vs_stats ip_vs_stats = {
1945 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1946};
1da177e4
LT
1947
1948#ifdef CONFIG_PROC_FS
1949static int ip_vs_stats_show(struct seq_file *seq, void *v)
1950{
1951
1952/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1953 seq_puts(seq,
1954 " Total Incoming Outgoing Incoming Outgoing\n");
1955 seq_printf(seq,
1956 " Conns Packets Packets Bytes Bytes\n");
1957
1958 spin_lock_bh(&ip_vs_stats.lock);
e9c0ce23
SW
1959 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1960 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1961 (unsigned long long) ip_vs_stats.ustats.inbytes,
1962 (unsigned long long) ip_vs_stats.ustats.outbytes);
1da177e4
LT
1963
1964/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1965 seq_puts(seq,
1966 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1967 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
e9c0ce23
SW
1968 ip_vs_stats.ustats.cps,
1969 ip_vs_stats.ustats.inpps,
1970 ip_vs_stats.ustats.outpps,
1971 ip_vs_stats.ustats.inbps,
1972 ip_vs_stats.ustats.outbps);
1da177e4
LT
1973 spin_unlock_bh(&ip_vs_stats.lock);
1974
1975 return 0;
1976}
1977
1978static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1979{
1980 return single_open(file, ip_vs_stats_show, NULL);
1981}
1982
9a32144e 1983static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1984 .owner = THIS_MODULE,
1985 .open = ip_vs_stats_seq_open,
1986 .read = seq_read,
1987 .llseek = seq_lseek,
1988 .release = single_release,
1989};
1990
1991#endif
1992
1993/*
1994 * Set timeout values for tcp tcpfin udp in the timeout_table.
1995 */
1996static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1997{
1998 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1999 u->tcp_timeout,
2000 u->tcp_fin_timeout,
2001 u->udp_timeout);
2002
2003#ifdef CONFIG_IP_VS_PROTO_TCP
2004 if (u->tcp_timeout) {
2005 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2006 = u->tcp_timeout * HZ;
2007 }
2008
2009 if (u->tcp_fin_timeout) {
2010 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2011 = u->tcp_fin_timeout * HZ;
2012 }
2013#endif
2014
2015#ifdef CONFIG_IP_VS_PROTO_UDP
2016 if (u->udp_timeout) {
2017 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2018 = u->udp_timeout * HZ;
2019 }
2020#endif
2021 return 0;
2022}
2023
2024
2025#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2026#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2027#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2028 sizeof(struct ip_vs_dest_user))
2029#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2030#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2031#define MAX_ARG_LEN SVCDEST_ARG_LEN
2032
9b5b5cff 2033static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2034 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2035 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2036 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2037 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2038 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2039 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2040 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2041 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2045};
2046
c860c6b1
JV
2047static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2048 struct ip_vs_service_user *usvc_compat)
2049{
2050 usvc->af = AF_INET;
2051 usvc->protocol = usvc_compat->protocol;
2052 usvc->addr.ip = usvc_compat->addr;
2053 usvc->port = usvc_compat->port;
2054 usvc->fwmark = usvc_compat->fwmark;
2055
2056 /* Deep copy of sched_name is not needed here */
2057 usvc->sched_name = usvc_compat->sched_name;
2058
2059 usvc->flags = usvc_compat->flags;
2060 usvc->timeout = usvc_compat->timeout;
2061 usvc->netmask = usvc_compat->netmask;
2062}
2063
2064static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2065 struct ip_vs_dest_user *udest_compat)
2066{
2067 udest->addr.ip = udest_compat->addr;
2068 udest->port = udest_compat->port;
2069 udest->conn_flags = udest_compat->conn_flags;
2070 udest->weight = udest_compat->weight;
2071 udest->u_threshold = udest_compat->u_threshold;
2072 udest->l_threshold = udest_compat->l_threshold;
2073}
2074
1da177e4
LT
2075static int
2076do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2077{
2078 int ret;
2079 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2080 struct ip_vs_service_user *usvc_compat;
2081 struct ip_vs_service_user_kern usvc;
1da177e4 2082 struct ip_vs_service *svc;
c860c6b1
JV
2083 struct ip_vs_dest_user *udest_compat;
2084 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2085
2086 if (!capable(CAP_NET_ADMIN))
2087 return -EPERM;
2088
2089 if (len != set_arglen[SET_CMDID(cmd)]) {
2090 IP_VS_ERR("set_ctl: len %u != %u\n",
2091 len, set_arglen[SET_CMDID(cmd)]);
2092 return -EINVAL;
2093 }
2094
2095 if (copy_from_user(arg, user, len) != 0)
2096 return -EFAULT;
2097
2098 /* increase the module use count */
2099 ip_vs_use_count_inc();
2100
14cc3e2b 2101 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2102 ret = -ERESTARTSYS;
2103 goto out_dec;
2104 }
2105
2106 if (cmd == IP_VS_SO_SET_FLUSH) {
2107 /* Flush the virtual service */
2108 ret = ip_vs_flush();
2109 goto out_unlock;
2110 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2111 /* Set timeout values for (tcp tcpfin udp) */
2112 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2113 goto out_unlock;
2114 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2115 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2116 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2117 goto out_unlock;
2118 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2119 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2120 ret = stop_sync_thread(dm->state);
2121 goto out_unlock;
2122 }
2123
c860c6b1
JV
2124 usvc_compat = (struct ip_vs_service_user *)arg;
2125 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2126
2127 /* We only use the new structs internally, so copy userspace compat
2128 * structs to extended internal versions */
2129 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2130 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2131
2132 if (cmd == IP_VS_SO_SET_ZERO) {
2133 /* if no service address is set, zero counters in all */
c860c6b1 2134 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2135 ret = ip_vs_zero_all();
2136 goto out_unlock;
2137 }
2138 }
2139
2140 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2141 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2142 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2143 usvc.protocol, NIPQUAD(usvc.addr.ip),
2144 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2145 ret = -EFAULT;
2146 goto out_unlock;
2147 }
2148
2149 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2150 if (usvc.fwmark == 0)
b18610de
JV
2151 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2152 &usvc.addr, usvc.port);
1da177e4 2153 else
b18610de 2154 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2155
2156 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2157 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2158 ret = -ESRCH;
2159 goto out_unlock;
2160 }
2161
2162 switch (cmd) {
2163 case IP_VS_SO_SET_ADD:
2164 if (svc != NULL)
2165 ret = -EEXIST;
2166 else
c860c6b1 2167 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2168 break;
2169 case IP_VS_SO_SET_EDIT:
c860c6b1 2170 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2171 break;
2172 case IP_VS_SO_SET_DEL:
2173 ret = ip_vs_del_service(svc);
2174 if (!ret)
2175 goto out_unlock;
2176 break;
2177 case IP_VS_SO_SET_ZERO:
2178 ret = ip_vs_zero_service(svc);
2179 break;
2180 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2181 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2182 break;
2183 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2184 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2185 break;
2186 case IP_VS_SO_SET_DELDEST:
c860c6b1 2187 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2188 break;
2189 default:
2190 ret = -EINVAL;
2191 }
2192
2193 if (svc)
2194 ip_vs_service_put(svc);
2195
2196 out_unlock:
14cc3e2b 2197 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2198 out_dec:
2199 /* decrease the module use count */
2200 ip_vs_use_count_dec();
2201
2202 return ret;
2203}
2204
2205
2206static void
2207ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2208{
2209 spin_lock_bh(&src->lock);
e9c0ce23 2210 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2211 spin_unlock_bh(&src->lock);
2212}
2213
2214static void
2215ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2216{
2217 dst->protocol = src->protocol;
e7ade46a 2218 dst->addr = src->addr.ip;
1da177e4
LT
2219 dst->port = src->port;
2220 dst->fwmark = src->fwmark;
4da62fc7 2221 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2222 dst->flags = src->flags;
2223 dst->timeout = src->timeout / HZ;
2224 dst->netmask = src->netmask;
2225 dst->num_dests = src->num_dests;
2226 ip_vs_copy_stats(&dst->stats, &src->stats);
2227}
2228
2229static inline int
2230__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2231 struct ip_vs_get_services __user *uptr)
2232{
2233 int idx, count=0;
2234 struct ip_vs_service *svc;
2235 struct ip_vs_service_entry entry;
2236 int ret = 0;
2237
2238 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2239 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041
JV
2240 /* Only expose IPv4 entries to old interface */
2241 if (svc->af != AF_INET)
2242 continue;
2243
1da177e4
LT
2244 if (count >= get->num_services)
2245 goto out;
4da62fc7 2246 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2247 ip_vs_copy_service(&entry, svc);
2248 if (copy_to_user(&uptr->entrytable[count],
2249 &entry, sizeof(entry))) {
2250 ret = -EFAULT;
2251 goto out;
2252 }
2253 count++;
2254 }
2255 }
2256
2257 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2258 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041
JV
2259 /* Only expose IPv4 entries to old interface */
2260 if (svc->af != AF_INET)
2261 continue;
2262
1da177e4
LT
2263 if (count >= get->num_services)
2264 goto out;
4da62fc7 2265 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2266 ip_vs_copy_service(&entry, svc);
2267 if (copy_to_user(&uptr->entrytable[count],
2268 &entry, sizeof(entry))) {
2269 ret = -EFAULT;
2270 goto out;
2271 }
2272 count++;
2273 }
2274 }
2275 out:
2276 return ret;
2277}
2278
2279static inline int
2280__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2281 struct ip_vs_get_dests __user *uptr)
2282{
2283 struct ip_vs_service *svc;
b18610de 2284 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2285 int ret = 0;
2286
2287 if (get->fwmark)
b18610de 2288 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2289 else
b18610de
JV
2290 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2291 get->port);
2292
1da177e4
LT
2293 if (svc) {
2294 int count = 0;
2295 struct ip_vs_dest *dest;
2296 struct ip_vs_dest_entry entry;
2297
2298 list_for_each_entry(dest, &svc->destinations, n_list) {
2299 if (count >= get->num_dests)
2300 break;
2301
e7ade46a 2302 entry.addr = dest->addr.ip;
1da177e4
LT
2303 entry.port = dest->port;
2304 entry.conn_flags = atomic_read(&dest->conn_flags);
2305 entry.weight = atomic_read(&dest->weight);
2306 entry.u_threshold = dest->u_threshold;
2307 entry.l_threshold = dest->l_threshold;
2308 entry.activeconns = atomic_read(&dest->activeconns);
2309 entry.inactconns = atomic_read(&dest->inactconns);
2310 entry.persistconns = atomic_read(&dest->persistconns);
2311 ip_vs_copy_stats(&entry.stats, &dest->stats);
2312 if (copy_to_user(&uptr->entrytable[count],
2313 &entry, sizeof(entry))) {
2314 ret = -EFAULT;
2315 break;
2316 }
2317 count++;
2318 }
2319 ip_vs_service_put(svc);
2320 } else
2321 ret = -ESRCH;
2322 return ret;
2323}
2324
2325static inline void
2326__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2327{
2328#ifdef CONFIG_IP_VS_PROTO_TCP
2329 u->tcp_timeout =
2330 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2331 u->tcp_fin_timeout =
2332 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2333#endif
2334#ifdef CONFIG_IP_VS_PROTO_UDP
2335 u->udp_timeout =
2336 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2337#endif
2338}
2339
2340
2341#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2342#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2343#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2344#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2345#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2346#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2347#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2348
9b5b5cff 2349static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2350 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2351 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2352 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2353 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2354 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2355 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2356 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2357};
2358
2359static int
2360do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2361{
2362 unsigned char arg[128];
2363 int ret = 0;
2364
2365 if (!capable(CAP_NET_ADMIN))
2366 return -EPERM;
2367
2368 if (*len < get_arglen[GET_CMDID(cmd)]) {
2369 IP_VS_ERR("get_ctl: len %u < %u\n",
2370 *len, get_arglen[GET_CMDID(cmd)]);
2371 return -EINVAL;
2372 }
2373
2374 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2375 return -EFAULT;
2376
14cc3e2b 2377 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2378 return -ERESTARTSYS;
2379
2380 switch (cmd) {
2381 case IP_VS_SO_GET_VERSION:
2382 {
2383 char buf[64];
2384
2385 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2386 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2387 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2388 ret = -EFAULT;
2389 goto out;
2390 }
2391 *len = strlen(buf)+1;
2392 }
2393 break;
2394
2395 case IP_VS_SO_GET_INFO:
2396 {
2397 struct ip_vs_getinfo info;
2398 info.version = IP_VS_VERSION_CODE;
2399 info.size = IP_VS_CONN_TAB_SIZE;
2400 info.num_services = ip_vs_num_services;
2401 if (copy_to_user(user, &info, sizeof(info)) != 0)
2402 ret = -EFAULT;
2403 }
2404 break;
2405
2406 case IP_VS_SO_GET_SERVICES:
2407 {
2408 struct ip_vs_get_services *get;
2409 int size;
2410
2411 get = (struct ip_vs_get_services *)arg;
2412 size = sizeof(*get) +
2413 sizeof(struct ip_vs_service_entry) * get->num_services;
2414 if (*len != size) {
2415 IP_VS_ERR("length: %u != %u\n", *len, size);
2416 ret = -EINVAL;
2417 goto out;
2418 }
2419 ret = __ip_vs_get_service_entries(get, user);
2420 }
2421 break;
2422
2423 case IP_VS_SO_GET_SERVICE:
2424 {
2425 struct ip_vs_service_entry *entry;
2426 struct ip_vs_service *svc;
b18610de 2427 union nf_inet_addr addr;
1da177e4
LT
2428
2429 entry = (struct ip_vs_service_entry *)arg;
b18610de 2430 addr.ip = entry->addr;
1da177e4 2431 if (entry->fwmark)
b18610de 2432 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2433 else
b18610de
JV
2434 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2435 &addr, entry->port);
1da177e4
LT
2436 if (svc) {
2437 ip_vs_copy_service(entry, svc);
2438 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2439 ret = -EFAULT;
2440 ip_vs_service_put(svc);
2441 } else
2442 ret = -ESRCH;
2443 }
2444 break;
2445
2446 case IP_VS_SO_GET_DESTS:
2447 {
2448 struct ip_vs_get_dests *get;
2449 int size;
2450
2451 get = (struct ip_vs_get_dests *)arg;
2452 size = sizeof(*get) +
2453 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2454 if (*len != size) {
2455 IP_VS_ERR("length: %u != %u\n", *len, size);
2456 ret = -EINVAL;
2457 goto out;
2458 }
2459 ret = __ip_vs_get_dest_entries(get, user);
2460 }
2461 break;
2462
2463 case IP_VS_SO_GET_TIMEOUT:
2464 {
2465 struct ip_vs_timeout_user t;
2466
2467 __ip_vs_get_timeouts(&t);
2468 if (copy_to_user(user, &t, sizeof(t)) != 0)
2469 ret = -EFAULT;
2470 }
2471 break;
2472
2473 case IP_VS_SO_GET_DAEMON:
2474 {
2475 struct ip_vs_daemon_user d[2];
2476
2477 memset(&d, 0, sizeof(d));
2478 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2479 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2480 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2481 d[0].syncid = ip_vs_master_syncid;
2482 }
2483 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2484 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2485 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2486 d[1].syncid = ip_vs_backup_syncid;
2487 }
2488 if (copy_to_user(user, &d, sizeof(d)) != 0)
2489 ret = -EFAULT;
2490 }
2491 break;
2492
2493 default:
2494 ret = -EINVAL;
2495 }
2496
2497 out:
14cc3e2b 2498 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2499 return ret;
2500}
2501
2502
2503static struct nf_sockopt_ops ip_vs_sockopts = {
2504 .pf = PF_INET,
2505 .set_optmin = IP_VS_BASE_CTL,
2506 .set_optmax = IP_VS_SO_SET_MAX+1,
2507 .set = do_ip_vs_set_ctl,
2508 .get_optmin = IP_VS_BASE_CTL,
2509 .get_optmax = IP_VS_SO_GET_MAX+1,
2510 .get = do_ip_vs_get_ctl,
16fcec35 2511 .owner = THIS_MODULE,
1da177e4
LT
2512};
2513
9a812198
JV
2514/*
2515 * Generic Netlink interface
2516 */
2517
2518/* IPVS genetlink family */
2519static struct genl_family ip_vs_genl_family = {
2520 .id = GENL_ID_GENERATE,
2521 .hdrsize = 0,
2522 .name = IPVS_GENL_NAME,
2523 .version = IPVS_GENL_VERSION,
2524 .maxattr = IPVS_CMD_MAX,
2525};
2526
2527/* Policy used for first-level command attributes */
2528static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2529 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2530 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2531 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2532 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2533 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2534 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2535};
2536
2537/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2538static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2539 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2540 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2541 .len = IP_VS_IFNAME_MAXLEN },
2542 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2543};
2544
2545/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2546static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2547 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2548 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2549 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2550 .len = sizeof(union nf_inet_addr) },
2551 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2552 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2553 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2554 .len = IP_VS_SCHEDNAME_MAXLEN },
2555 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2556 .len = sizeof(struct ip_vs_flags) },
2557 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2558 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2559 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2560};
2561
2562/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2563static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2564 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2565 .len = sizeof(union nf_inet_addr) },
2566 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2567 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2568 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2569 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2570 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2571 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2572 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2573 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2574 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2575};
2576
2577static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2578 struct ip_vs_stats *stats)
2579{
2580 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2581 if (!nl_stats)
2582 return -EMSGSIZE;
2583
2584 spin_lock_bh(&stats->lock);
2585
e9c0ce23
SW
2586 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2587 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2588 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2589 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2590 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2591 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2592 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2593 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2594 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2595 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2596
2597 spin_unlock_bh(&stats->lock);
2598
2599 nla_nest_end(skb, nl_stats);
2600
2601 return 0;
2602
2603nla_put_failure:
2604 spin_unlock_bh(&stats->lock);
2605 nla_nest_cancel(skb, nl_stats);
2606 return -EMSGSIZE;
2607}
2608
2609static int ip_vs_genl_fill_service(struct sk_buff *skb,
2610 struct ip_vs_service *svc)
2611{
2612 struct nlattr *nl_service;
2613 struct ip_vs_flags flags = { .flags = svc->flags,
2614 .mask = ~0 };
2615
2616 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2617 if (!nl_service)
2618 return -EMSGSIZE;
2619
f94fd041 2620 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2621
2622 if (svc->fwmark) {
2623 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2624 } else {
2625 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2626 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2627 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2628 }
2629
2630 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2631 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2632 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2633 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2634
2635 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2636 goto nla_put_failure;
2637
2638 nla_nest_end(skb, nl_service);
2639
2640 return 0;
2641
2642nla_put_failure:
2643 nla_nest_cancel(skb, nl_service);
2644 return -EMSGSIZE;
2645}
2646
2647static int ip_vs_genl_dump_service(struct sk_buff *skb,
2648 struct ip_vs_service *svc,
2649 struct netlink_callback *cb)
2650{
2651 void *hdr;
2652
2653 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2654 &ip_vs_genl_family, NLM_F_MULTI,
2655 IPVS_CMD_NEW_SERVICE);
2656 if (!hdr)
2657 return -EMSGSIZE;
2658
2659 if (ip_vs_genl_fill_service(skb, svc) < 0)
2660 goto nla_put_failure;
2661
2662 return genlmsg_end(skb, hdr);
2663
2664nla_put_failure:
2665 genlmsg_cancel(skb, hdr);
2666 return -EMSGSIZE;
2667}
2668
2669static int ip_vs_genl_dump_services(struct sk_buff *skb,
2670 struct netlink_callback *cb)
2671{
2672 int idx = 0, i;
2673 int start = cb->args[0];
2674 struct ip_vs_service *svc;
2675
2676 mutex_lock(&__ip_vs_mutex);
2677 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2678 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2679 if (++idx <= start)
2680 continue;
2681 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2682 idx--;
2683 goto nla_put_failure;
2684 }
2685 }
2686 }
2687
2688 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2689 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2690 if (++idx <= start)
2691 continue;
2692 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2693 idx--;
2694 goto nla_put_failure;
2695 }
2696 }
2697 }
2698
2699nla_put_failure:
2700 mutex_unlock(&__ip_vs_mutex);
2701 cb->args[0] = idx;
2702
2703 return skb->len;
2704}
2705
c860c6b1 2706static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2707 struct nlattr *nla, int full_entry)
2708{
2709 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2710 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2711
2712 /* Parse mandatory identifying service fields first */
2713 if (nla == NULL ||
2714 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2715 return -EINVAL;
2716
2717 nla_af = attrs[IPVS_SVC_ATTR_AF];
2718 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2719 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2720 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2721 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2722
2723 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2724 return -EINVAL;
2725
c860c6b1 2726 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2727#ifdef CONFIG_IP_VS_IPV6
2728 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2729#else
2730 if (usvc->af != AF_INET)
2731#endif
9a812198
JV
2732 return -EAFNOSUPPORT;
2733
2734 if (nla_fwmark) {
2735 usvc->protocol = IPPROTO_TCP;
2736 usvc->fwmark = nla_get_u32(nla_fwmark);
2737 } else {
2738 usvc->protocol = nla_get_u16(nla_protocol);
2739 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2740 usvc->port = nla_get_u16(nla_port);
2741 usvc->fwmark = 0;
2742 }
2743
2744 /* If a full entry was requested, check for the additional fields */
2745 if (full_entry) {
2746 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2747 *nla_netmask;
2748 struct ip_vs_flags flags;
2749 struct ip_vs_service *svc;
2750
2751 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2752 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2753 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2754 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2755
2756 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2757 return -EINVAL;
2758
2759 nla_memcpy(&flags, nla_flags, sizeof(flags));
2760
2761 /* prefill flags from service if it already exists */
2762 if (usvc->fwmark)
b18610de 2763 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2764 else
b18610de
JV
2765 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2766 &usvc->addr, usvc->port);
9a812198
JV
2767 if (svc) {
2768 usvc->flags = svc->flags;
2769 ip_vs_service_put(svc);
2770 } else
2771 usvc->flags = 0;
2772
2773 /* set new flags from userland */
2774 usvc->flags = (usvc->flags & ~flags.mask) |
2775 (flags.flags & flags.mask);
c860c6b1 2776 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2777 usvc->timeout = nla_get_u32(nla_timeout);
2778 usvc->netmask = nla_get_u32(nla_netmask);
2779 }
2780
2781 return 0;
2782}
2783
2784static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2785{
c860c6b1 2786 struct ip_vs_service_user_kern usvc;
9a812198
JV
2787 int ret;
2788
2789 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2790 if (ret)
2791 return ERR_PTR(ret);
2792
2793 if (usvc.fwmark)
b18610de 2794 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2795 else
b18610de
JV
2796 return __ip_vs_service_get(usvc.af, usvc.protocol,
2797 &usvc.addr, usvc.port);
9a812198
JV
2798}
2799
2800static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2801{
2802 struct nlattr *nl_dest;
2803
2804 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2805 if (!nl_dest)
2806 return -EMSGSIZE;
2807
2808 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2809 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2810
2811 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2812 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2813 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2814 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2815 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2816 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2817 atomic_read(&dest->activeconns));
2818 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2819 atomic_read(&dest->inactconns));
2820 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2821 atomic_read(&dest->persistconns));
2822
2823 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2824 goto nla_put_failure;
2825
2826 nla_nest_end(skb, nl_dest);
2827
2828 return 0;
2829
2830nla_put_failure:
2831 nla_nest_cancel(skb, nl_dest);
2832 return -EMSGSIZE;
2833}
2834
2835static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2836 struct netlink_callback *cb)
2837{
2838 void *hdr;
2839
2840 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2841 &ip_vs_genl_family, NLM_F_MULTI,
2842 IPVS_CMD_NEW_DEST);
2843 if (!hdr)
2844 return -EMSGSIZE;
2845
2846 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2847 goto nla_put_failure;
2848
2849 return genlmsg_end(skb, hdr);
2850
2851nla_put_failure:
2852 genlmsg_cancel(skb, hdr);
2853 return -EMSGSIZE;
2854}
2855
2856static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2857 struct netlink_callback *cb)
2858{
2859 int idx = 0;
2860 int start = cb->args[0];
2861 struct ip_vs_service *svc;
2862 struct ip_vs_dest *dest;
2863 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2864
2865 mutex_lock(&__ip_vs_mutex);
2866
2867 /* Try to find the service for which to dump destinations */
2868 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2869 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2870 goto out_err;
2871
2872 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2873 if (IS_ERR(svc) || svc == NULL)
2874 goto out_err;
2875
2876 /* Dump the destinations */
2877 list_for_each_entry(dest, &svc->destinations, n_list) {
2878 if (++idx <= start)
2879 continue;
2880 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2881 idx--;
2882 goto nla_put_failure;
2883 }
2884 }
2885
2886nla_put_failure:
2887 cb->args[0] = idx;
2888 ip_vs_service_put(svc);
2889
2890out_err:
2891 mutex_unlock(&__ip_vs_mutex);
2892
2893 return skb->len;
2894}
2895
c860c6b1 2896static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2897 struct nlattr *nla, int full_entry)
2898{
2899 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2900 struct nlattr *nla_addr, *nla_port;
2901
2902 /* Parse mandatory identifying destination fields first */
2903 if (nla == NULL ||
2904 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2905 return -EINVAL;
2906
2907 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2908 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2909
2910 if (!(nla_addr && nla_port))
2911 return -EINVAL;
2912
2913 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2914 udest->port = nla_get_u16(nla_port);
2915
2916 /* If a full entry was requested, check for the additional fields */
2917 if (full_entry) {
2918 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2919 *nla_l_thresh;
2920
2921 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2922 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2923 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2924 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2925
2926 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2927 return -EINVAL;
2928
2929 udest->conn_flags = nla_get_u32(nla_fwd)
2930 & IP_VS_CONN_F_FWD_MASK;
2931 udest->weight = nla_get_u32(nla_weight);
2932 udest->u_threshold = nla_get_u32(nla_u_thresh);
2933 udest->l_threshold = nla_get_u32(nla_l_thresh);
2934 }
2935
2936 return 0;
2937}
2938
2939static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2940 const char *mcast_ifn, __be32 syncid)
2941{
2942 struct nlattr *nl_daemon;
2943
2944 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2945 if (!nl_daemon)
2946 return -EMSGSIZE;
2947
2948 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2949 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2950 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2951
2952 nla_nest_end(skb, nl_daemon);
2953
2954 return 0;
2955
2956nla_put_failure:
2957 nla_nest_cancel(skb, nl_daemon);
2958 return -EMSGSIZE;
2959}
2960
2961static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2962 const char *mcast_ifn, __be32 syncid,
2963 struct netlink_callback *cb)
2964{
2965 void *hdr;
2966 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2967 &ip_vs_genl_family, NLM_F_MULTI,
2968 IPVS_CMD_NEW_DAEMON);
2969 if (!hdr)
2970 return -EMSGSIZE;
2971
2972 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2973 goto nla_put_failure;
2974
2975 return genlmsg_end(skb, hdr);
2976
2977nla_put_failure:
2978 genlmsg_cancel(skb, hdr);
2979 return -EMSGSIZE;
2980}
2981
2982static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2983 struct netlink_callback *cb)
2984{
2985 mutex_lock(&__ip_vs_mutex);
2986 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2987 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2988 ip_vs_master_mcast_ifn,
2989 ip_vs_master_syncid, cb) < 0)
2990 goto nla_put_failure;
2991
2992 cb->args[0] = 1;
2993 }
2994
2995 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2996 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2997 ip_vs_backup_mcast_ifn,
2998 ip_vs_backup_syncid, cb) < 0)
2999 goto nla_put_failure;
3000
3001 cb->args[1] = 1;
3002 }
3003
3004nla_put_failure:
3005 mutex_unlock(&__ip_vs_mutex);
3006
3007 return skb->len;
3008}
3009
3010static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3011{
3012 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3013 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3014 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3015 return -EINVAL;
3016
3017 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3018 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3019 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3020}
3021
3022static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3023{
3024 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3025 return -EINVAL;
3026
3027 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3028}
3029
3030static int ip_vs_genl_set_config(struct nlattr **attrs)
3031{
3032 struct ip_vs_timeout_user t;
3033
3034 __ip_vs_get_timeouts(&t);
3035
3036 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3037 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3038
3039 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3040 t.tcp_fin_timeout =
3041 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3042
3043 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3044 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3045
3046 return ip_vs_set_timeout(&t);
3047}
3048
3049static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3050{
3051 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3052 struct ip_vs_service_user_kern usvc;
3053 struct ip_vs_dest_user_kern udest;
9a812198
JV
3054 int ret = 0, cmd;
3055 int need_full_svc = 0, need_full_dest = 0;
3056
3057 cmd = info->genlhdr->cmd;
3058
3059 mutex_lock(&__ip_vs_mutex);
3060
3061 if (cmd == IPVS_CMD_FLUSH) {
3062 ret = ip_vs_flush();
3063 goto out;
3064 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3065 ret = ip_vs_genl_set_config(info->attrs);
3066 goto out;
3067 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3068 cmd == IPVS_CMD_DEL_DAEMON) {
3069
3070 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3071
3072 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3073 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3074 info->attrs[IPVS_CMD_ATTR_DAEMON],
3075 ip_vs_daemon_policy)) {
3076 ret = -EINVAL;
3077 goto out;
3078 }
3079
3080 if (cmd == IPVS_CMD_NEW_DAEMON)
3081 ret = ip_vs_genl_new_daemon(daemon_attrs);
3082 else
3083 ret = ip_vs_genl_del_daemon(daemon_attrs);
3084 goto out;
3085 } else if (cmd == IPVS_CMD_ZERO &&
3086 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3087 ret = ip_vs_zero_all();
3088 goto out;
3089 }
3090
3091 /* All following commands require a service argument, so check if we
3092 * received a valid one. We need a full service specification when
3093 * adding / editing a service. Only identifying members otherwise. */
3094 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3095 need_full_svc = 1;
3096
3097 ret = ip_vs_genl_parse_service(&usvc,
3098 info->attrs[IPVS_CMD_ATTR_SERVICE],
3099 need_full_svc);
3100 if (ret)
3101 goto out;
3102
3103 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3104 if (usvc.fwmark == 0)
b18610de
JV
3105 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3106 &usvc.addr, usvc.port);
9a812198 3107 else
b18610de 3108 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3109
3110 /* Unless we're adding a new service, the service must already exist */
3111 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3112 ret = -ESRCH;
3113 goto out;
3114 }
3115
3116 /* Destination commands require a valid destination argument. For
3117 * adding / editing a destination, we need a full destination
3118 * specification. */
3119 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3120 cmd == IPVS_CMD_DEL_DEST) {
3121 if (cmd != IPVS_CMD_DEL_DEST)
3122 need_full_dest = 1;
3123
3124 ret = ip_vs_genl_parse_dest(&udest,
3125 info->attrs[IPVS_CMD_ATTR_DEST],
3126 need_full_dest);
3127 if (ret)
3128 goto out;
3129 }
3130
3131 switch (cmd) {
3132 case IPVS_CMD_NEW_SERVICE:
3133 if (svc == NULL)
3134 ret = ip_vs_add_service(&usvc, &svc);
3135 else
3136 ret = -EEXIST;
3137 break;
3138 case IPVS_CMD_SET_SERVICE:
3139 ret = ip_vs_edit_service(svc, &usvc);
3140 break;
3141 case IPVS_CMD_DEL_SERVICE:
3142 ret = ip_vs_del_service(svc);
3143 break;
3144 case IPVS_CMD_NEW_DEST:
3145 ret = ip_vs_add_dest(svc, &udest);
3146 break;
3147 case IPVS_CMD_SET_DEST:
3148 ret = ip_vs_edit_dest(svc, &udest);
3149 break;
3150 case IPVS_CMD_DEL_DEST:
3151 ret = ip_vs_del_dest(svc, &udest);
3152 break;
3153 case IPVS_CMD_ZERO:
3154 ret = ip_vs_zero_service(svc);
3155 break;
3156 default:
3157 ret = -EINVAL;
3158 }
3159
3160out:
3161 if (svc)
3162 ip_vs_service_put(svc);
3163 mutex_unlock(&__ip_vs_mutex);
3164
3165 return ret;
3166}
3167
3168static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3169{
3170 struct sk_buff *msg;
3171 void *reply;
3172 int ret, cmd, reply_cmd;
3173
3174 cmd = info->genlhdr->cmd;
3175
3176 if (cmd == IPVS_CMD_GET_SERVICE)
3177 reply_cmd = IPVS_CMD_NEW_SERVICE;
3178 else if (cmd == IPVS_CMD_GET_INFO)
3179 reply_cmd = IPVS_CMD_SET_INFO;
3180 else if (cmd == IPVS_CMD_GET_CONFIG)
3181 reply_cmd = IPVS_CMD_SET_CONFIG;
3182 else {
3183 IP_VS_ERR("unknown Generic Netlink command\n");
3184 return -EINVAL;
3185 }
3186
3187 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3188 if (!msg)
3189 return -ENOMEM;
3190
3191 mutex_lock(&__ip_vs_mutex);
3192
3193 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3194 if (reply == NULL)
3195 goto nla_put_failure;
3196
3197 switch (cmd) {
3198 case IPVS_CMD_GET_SERVICE:
3199 {
3200 struct ip_vs_service *svc;
3201
3202 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3203 if (IS_ERR(svc)) {
3204 ret = PTR_ERR(svc);
3205 goto out_err;
3206 } else if (svc) {
3207 ret = ip_vs_genl_fill_service(msg, svc);
3208 ip_vs_service_put(svc);
3209 if (ret)
3210 goto nla_put_failure;
3211 } else {
3212 ret = -ESRCH;
3213 goto out_err;
3214 }
3215
3216 break;
3217 }
3218
3219 case IPVS_CMD_GET_CONFIG:
3220 {
3221 struct ip_vs_timeout_user t;
3222
3223 __ip_vs_get_timeouts(&t);
3224#ifdef CONFIG_IP_VS_PROTO_TCP
3225 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3226 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3227 t.tcp_fin_timeout);
3228#endif
3229#ifdef CONFIG_IP_VS_PROTO_UDP
3230 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3231#endif
3232
3233 break;
3234 }
3235
3236 case IPVS_CMD_GET_INFO:
3237 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3238 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3239 IP_VS_CONN_TAB_SIZE);
3240 break;
3241 }
3242
3243 genlmsg_end(msg, reply);
3244 ret = genlmsg_unicast(msg, info->snd_pid);
3245 goto out;
3246
3247nla_put_failure:
3248 IP_VS_ERR("not enough space in Netlink message\n");
3249 ret = -EMSGSIZE;
3250
3251out_err:
3252 nlmsg_free(msg);
3253out:
3254 mutex_unlock(&__ip_vs_mutex);
3255
3256 return ret;
3257}
3258
3259
3260static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3261 {
3262 .cmd = IPVS_CMD_NEW_SERVICE,
3263 .flags = GENL_ADMIN_PERM,
3264 .policy = ip_vs_cmd_policy,
3265 .doit = ip_vs_genl_set_cmd,
3266 },
3267 {
3268 .cmd = IPVS_CMD_SET_SERVICE,
3269 .flags = GENL_ADMIN_PERM,
3270 .policy = ip_vs_cmd_policy,
3271 .doit = ip_vs_genl_set_cmd,
3272 },
3273 {
3274 .cmd = IPVS_CMD_DEL_SERVICE,
3275 .flags = GENL_ADMIN_PERM,
3276 .policy = ip_vs_cmd_policy,
3277 .doit = ip_vs_genl_set_cmd,
3278 },
3279 {
3280 .cmd = IPVS_CMD_GET_SERVICE,
3281 .flags = GENL_ADMIN_PERM,
3282 .doit = ip_vs_genl_get_cmd,
3283 .dumpit = ip_vs_genl_dump_services,
3284 .policy = ip_vs_cmd_policy,
3285 },
3286 {
3287 .cmd = IPVS_CMD_NEW_DEST,
3288 .flags = GENL_ADMIN_PERM,
3289 .policy = ip_vs_cmd_policy,
3290 .doit = ip_vs_genl_set_cmd,
3291 },
3292 {
3293 .cmd = IPVS_CMD_SET_DEST,
3294 .flags = GENL_ADMIN_PERM,
3295 .policy = ip_vs_cmd_policy,
3296 .doit = ip_vs_genl_set_cmd,
3297 },
3298 {
3299 .cmd = IPVS_CMD_DEL_DEST,
3300 .flags = GENL_ADMIN_PERM,
3301 .policy = ip_vs_cmd_policy,
3302 .doit = ip_vs_genl_set_cmd,
3303 },
3304 {
3305 .cmd = IPVS_CMD_GET_DEST,
3306 .flags = GENL_ADMIN_PERM,
3307 .policy = ip_vs_cmd_policy,
3308 .dumpit = ip_vs_genl_dump_dests,
3309 },
3310 {
3311 .cmd = IPVS_CMD_NEW_DAEMON,
3312 .flags = GENL_ADMIN_PERM,
3313 .policy = ip_vs_cmd_policy,
3314 .doit = ip_vs_genl_set_cmd,
3315 },
3316 {
3317 .cmd = IPVS_CMD_DEL_DAEMON,
3318 .flags = GENL_ADMIN_PERM,
3319 .policy = ip_vs_cmd_policy,
3320 .doit = ip_vs_genl_set_cmd,
3321 },
3322 {
3323 .cmd = IPVS_CMD_GET_DAEMON,
3324 .flags = GENL_ADMIN_PERM,
3325 .dumpit = ip_vs_genl_dump_daemons,
3326 },
3327 {
3328 .cmd = IPVS_CMD_SET_CONFIG,
3329 .flags = GENL_ADMIN_PERM,
3330 .policy = ip_vs_cmd_policy,
3331 .doit = ip_vs_genl_set_cmd,
3332 },
3333 {
3334 .cmd = IPVS_CMD_GET_CONFIG,
3335 .flags = GENL_ADMIN_PERM,
3336 .doit = ip_vs_genl_get_cmd,
3337 },
3338 {
3339 .cmd = IPVS_CMD_GET_INFO,
3340 .flags = GENL_ADMIN_PERM,
3341 .doit = ip_vs_genl_get_cmd,
3342 },
3343 {
3344 .cmd = IPVS_CMD_ZERO,
3345 .flags = GENL_ADMIN_PERM,
3346 .policy = ip_vs_cmd_policy,
3347 .doit = ip_vs_genl_set_cmd,
3348 },
3349 {
3350 .cmd = IPVS_CMD_FLUSH,
3351 .flags = GENL_ADMIN_PERM,
3352 .doit = ip_vs_genl_set_cmd,
3353 },
3354};
3355
3356static int __init ip_vs_genl_register(void)
3357{
3358 int ret, i;
3359
3360 ret = genl_register_family(&ip_vs_genl_family);
3361 if (ret)
3362 return ret;
3363
3364 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3365 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3366 if (ret)
3367 goto err_out;
3368 }
3369 return 0;
3370
3371err_out:
3372 genl_unregister_family(&ip_vs_genl_family);
3373 return ret;
3374}
3375
3376static void ip_vs_genl_unregister(void)
3377{
3378 genl_unregister_family(&ip_vs_genl_family);
3379}
3380
3381/* End of Generic Netlink interface definitions */
3382
1da177e4 3383
048cf48b 3384int __init ip_vs_control_init(void)
1da177e4
LT
3385{
3386 int ret;
3387 int idx;
3388
3389 EnterFunction(2);
3390
3391 ret = nf_register_sockopt(&ip_vs_sockopts);
3392 if (ret) {
3393 IP_VS_ERR("cannot register sockopt.\n");
3394 return ret;
3395 }
3396
9a812198
JV
3397 ret = ip_vs_genl_register();
3398 if (ret) {
3399 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3400 nf_unregister_sockopt(&ip_vs_sockopts);
3401 return ret;
3402 }
3403
457c4cbc
EB
3404 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3405 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3406
90754f8e 3407 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3408
3409 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3410 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3411 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3412 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3413 }
3414 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3415 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3416 }
3417
1da177e4
LT
3418 ip_vs_new_estimator(&ip_vs_stats);
3419
3420 /* Hook the defense timer */
3421 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3422
3423 LeaveFunction(2);
3424 return 0;
3425}
3426
3427
3428void ip_vs_control_cleanup(void)
3429{
3430 EnterFunction(2);
3431 ip_vs_trash_cleanup();
3432 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3433 cancel_work_sync(&defense_work.work);
1da177e4
LT
3434 ip_vs_kill_estimator(&ip_vs_stats);
3435 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3436 proc_net_remove(&init_net, "ip_vs_stats");
3437 proc_net_remove(&init_net, "ip_vs");
9a812198 3438 ip_vs_genl_unregister();
1da177e4
LT
3439 nf_unregister_sockopt(&ip_vs_sockopts);
3440 LeaveFunction(2);
3441}