]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add debug macros for v4 and v6 address output
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
014d730d 285ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
1da177e4
LT
286{
287 register unsigned porth = ntohs(port);
288
289 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290 & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 * Returns hash value of fwmark for virtual service lookup
295 */
296static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297{
298 return fwmark & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
305 */
306static int ip_vs_svc_hash(struct ip_vs_service *svc)
307{
308 unsigned hash;
309
310 if (svc->flags & IP_VS_SVC_F_HASHED) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
313 return 0;
314 }
315
316 if (svc->fwmark == 0) {
317 /*
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
319 */
e7ade46a
JV
320 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip,
321 svc->port);
1da177e4
LT
322 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
323 } else {
324 /*
325 * Hash it by fwmark in ip_vs_svc_fwm_table
326 */
327 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
328 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
329 }
330
331 svc->flags |= IP_VS_SVC_F_HASHED;
332 /* increase its refcnt because it is referenced by the svc table */
333 atomic_inc(&svc->refcnt);
334 return 1;
335}
336
337
338/*
339 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
340 * Should be called with locked tables.
341 */
342static int ip_vs_svc_unhash(struct ip_vs_service *svc)
343{
344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
345 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
346 "called from %p\n", __builtin_return_address(0));
347 return 0;
348 }
349
350 if (svc->fwmark == 0) {
351 /* Remove it from the ip_vs_svc_table table */
352 list_del(&svc->s_list);
353 } else {
354 /* Remove it from the ip_vs_svc_fwm_table table */
355 list_del(&svc->f_list);
356 }
357
358 svc->flags &= ~IP_VS_SVC_F_HASHED;
359 atomic_dec(&svc->refcnt);
360 return 1;
361}
362
363
364/*
365 * Get service by {proto,addr,port} in the service table.
366 */
367static __inline__ struct ip_vs_service *
014d730d 368__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
369{
370 unsigned hash;
371 struct ip_vs_service *svc;
372
373 /* Check for "full" addressed entries */
374 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
375
376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
e7ade46a 377 if ((svc->addr.ip == vaddr)
1da177e4
LT
378 && (svc->port == vport)
379 && (svc->protocol == protocol)) {
380 /* HIT */
381 atomic_inc(&svc->usecnt);
382 return svc;
383 }
384 }
385
386 return NULL;
387}
388
389
390/*
391 * Get service by {fwmark} in the service table.
392 */
393static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
394{
395 unsigned hash;
396 struct ip_vs_service *svc;
397
398 /* Check for fwmark addressed entries */
399 hash = ip_vs_svc_fwm_hashkey(fwmark);
400
401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
402 if (svc->fwmark == fwmark) {
403 /* HIT */
404 atomic_inc(&svc->usecnt);
405 return svc;
406 }
407 }
408
409 return NULL;
410}
411
412struct ip_vs_service *
014d730d 413ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
414{
415 struct ip_vs_service *svc;
416
417 read_lock(&__ip_vs_svc_lock);
418
419 /*
420 * Check the table hashed by fwmark first
421 */
422 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
423 goto out;
424
425 /*
426 * Check the table hashed by <protocol,addr,port>
427 * for "full" addressed entries
428 */
429 svc = __ip_vs_service_get(protocol, vaddr, vport);
430
431 if (svc == NULL
432 && protocol == IPPROTO_TCP
433 && atomic_read(&ip_vs_ftpsvc_counter)
434 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
435 /*
436 * Check if ftp service entry exists, the packet
437 * might belong to FTP data connections.
438 */
439 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
440 }
441
442 if (svc == NULL
443 && atomic_read(&ip_vs_nullsvc_counter)) {
444 /*
445 * Check if the catch-all port (port zero) exists
446 */
447 svc = __ip_vs_service_get(protocol, vaddr, 0);
448 }
449
450 out:
451 read_unlock(&__ip_vs_svc_lock);
452
4b5bdf5c 453 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
454 fwmark, ip_vs_proto_name(protocol),
455 NIPQUAD(vaddr), ntohs(vport),
456 svc?"hit":"not hit");
457
458 return svc;
459}
460
461
462static inline void
463__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
464{
465 atomic_inc(&svc->refcnt);
466 dest->svc = svc;
467}
468
469static inline void
470__ip_vs_unbind_svc(struct ip_vs_dest *dest)
471{
472 struct ip_vs_service *svc = dest->svc;
473
474 dest->svc = NULL;
475 if (atomic_dec_and_test(&svc->refcnt))
476 kfree(svc);
477}
478
479
480/*
481 * Returns hash value for real service
482 */
014d730d 483static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
484{
485 register unsigned porth = ntohs(port);
486
487 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
488 & IP_VS_RTAB_MASK;
489}
490
491/*
492 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
493 * should be called with locked tables.
494 */
495static int ip_vs_rs_hash(struct ip_vs_dest *dest)
496{
497 unsigned hash;
498
499 if (!list_empty(&dest->d_list)) {
500 return 0;
501 }
502
503 /*
504 * Hash by proto,addr,port,
505 * which are the parameters of the real service.
506 */
e7ade46a 507 hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
1da177e4
LT
508 list_add(&dest->d_list, &ip_vs_rtable[hash]);
509
510 return 1;
511}
512
513/*
514 * UNhashes ip_vs_dest from ip_vs_rtable.
515 * should be called with locked tables.
516 */
517static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
518{
519 /*
520 * Remove it from the ip_vs_rtable table.
521 */
522 if (!list_empty(&dest->d_list)) {
523 list_del(&dest->d_list);
524 INIT_LIST_HEAD(&dest->d_list);
525 }
526
527 return 1;
528}
529
530/*
531 * Lookup real service by <proto,addr,port> in the real service table.
532 */
533struct ip_vs_dest *
014d730d 534ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
535{
536 unsigned hash;
537 struct ip_vs_dest *dest;
538
539 /*
540 * Check for "full" addressed entries
541 * Return the first found entry
542 */
543 hash = ip_vs_rs_hashkey(daddr, dport);
544
545 read_lock(&__ip_vs_rs_lock);
546 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
e7ade46a 547 if ((dest->addr.ip == daddr)
1da177e4
LT
548 && (dest->port == dport)
549 && ((dest->protocol == protocol) ||
550 dest->vfwmark)) {
551 /* HIT */
552 read_unlock(&__ip_vs_rs_lock);
553 return dest;
554 }
555 }
556 read_unlock(&__ip_vs_rs_lock);
557
558 return NULL;
559}
560
561/*
562 * Lookup destination by {addr,port} in the given service
563 */
564static struct ip_vs_dest *
014d730d 565ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
566{
567 struct ip_vs_dest *dest;
568
569 /*
570 * Find the destination for the given service
571 */
572 list_for_each_entry(dest, &svc->destinations, n_list) {
e7ade46a 573 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
1da177e4
LT
574 /* HIT */
575 return dest;
576 }
577 }
578
579 return NULL;
580}
581
1e356f9c
RB
582/*
583 * Find destination by {daddr,dport,vaddr,protocol}
584 * Cretaed to be used in ip_vs_process_message() in
585 * the backup synchronization daemon. It finds the
586 * destination to be bound to the received connection
587 * on the backup.
588 *
589 * ip_vs_lookup_real_service() looked promissing, but
590 * seems not working as expected.
591 */
592struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
593 __be32 vaddr, __be16 vport, __u16 protocol)
594{
595 struct ip_vs_dest *dest;
596 struct ip_vs_service *svc;
597
598 svc = ip_vs_service_get(0, protocol, vaddr, vport);
599 if (!svc)
600 return NULL;
601 dest = ip_vs_lookup_dest(svc, daddr, dport);
602 if (dest)
603 atomic_inc(&dest->refcnt);
604 ip_vs_service_put(svc);
605 return dest;
606}
1da177e4
LT
607
608/*
609 * Lookup dest by {svc,addr,port} in the destination trash.
610 * The destination trash is used to hold the destinations that are removed
611 * from the service table but are still referenced by some conn entries.
612 * The reason to add the destination trash is when the dest is temporary
613 * down (either by administrator or by monitor program), the dest can be
614 * picked back from the trash, the remaining connections to the dest can
615 * continue, and the counting information of the dest is also useful for
616 * scheduling.
617 */
618static struct ip_vs_dest *
014d730d 619ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
620{
621 struct ip_vs_dest *dest, *nxt;
622
623 /*
624 * Find the destination in trash
625 */
626 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
627 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 628 "dest->refcnt=%d\n",
1da177e4 629 dest->vfwmark,
e7ade46a 630 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4 631 atomic_read(&dest->refcnt));
e7ade46a 632 if (dest->addr.ip == daddr &&
1da177e4
LT
633 dest->port == dport &&
634 dest->vfwmark == svc->fwmark &&
635 dest->protocol == svc->protocol &&
636 (svc->fwmark ||
e7ade46a 637 (dest->vaddr.ip == svc->addr.ip &&
1da177e4
LT
638 dest->vport == svc->port))) {
639 /* HIT */
640 return dest;
641 }
642
643 /*
644 * Try to purge the destination from trash if not referenced
645 */
646 if (atomic_read(&dest->refcnt) == 1) {
647 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
648 "from trash\n",
649 dest->vfwmark,
e7ade46a 650 NIPQUAD(dest->addr.ip), ntohs(dest->port));
1da177e4
LT
651 list_del(&dest->n_list);
652 ip_vs_dst_reset(dest);
653 __ip_vs_unbind_svc(dest);
654 kfree(dest);
655 }
656 }
657
658 return NULL;
659}
660
661
662/*
663 * Clean up all the destinations in the trash
664 * Called by the ip_vs_control_cleanup()
665 *
666 * When the ip_vs_control_clearup is activated by ipvs module exit,
667 * the service tables must have been flushed and all the connections
668 * are expired, and the refcnt of each destination in the trash must
669 * be 1, so we simply release them here.
670 */
671static void ip_vs_trash_cleanup(void)
672{
673 struct ip_vs_dest *dest, *nxt;
674
675 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
676 list_del(&dest->n_list);
677 ip_vs_dst_reset(dest);
678 __ip_vs_unbind_svc(dest);
679 kfree(dest);
680 }
681}
682
683
684static void
685ip_vs_zero_stats(struct ip_vs_stats *stats)
686{
687 spin_lock_bh(&stats->lock);
e93615d0
SH
688
689 stats->conns = 0;
690 stats->inpkts = 0;
691 stats->outpkts = 0;
692 stats->inbytes = 0;
693 stats->outbytes = 0;
694
695 stats->cps = 0;
696 stats->inpps = 0;
697 stats->outpps = 0;
698 stats->inbps = 0;
699 stats->outbps = 0;
700
1da177e4 701 ip_vs_zero_estimator(stats);
e93615d0 702
3a14a313 703 spin_unlock_bh(&stats->lock);
1da177e4
LT
704}
705
706/*
707 * Update a destination in the given service
708 */
709static void
710__ip_vs_update_dest(struct ip_vs_service *svc,
711 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
712{
713 int conn_flags;
714
715 /* set the weight and the flags */
716 atomic_set(&dest->weight, udest->weight);
717 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
718
719 /* check if local node and update the flags */
6b175b26 720 if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
1da177e4
LT
721 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
722 | IP_VS_CONN_F_LOCALNODE;
723 }
724
725 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
726 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
727 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
728 } else {
729 /*
730 * Put the real service in ip_vs_rtable if not present.
731 * For now only for NAT!
732 */
733 write_lock_bh(&__ip_vs_rs_lock);
734 ip_vs_rs_hash(dest);
735 write_unlock_bh(&__ip_vs_rs_lock);
736 }
737 atomic_set(&dest->conn_flags, conn_flags);
738
739 /* bind the service */
740 if (!dest->svc) {
741 __ip_vs_bind_svc(dest, svc);
742 } else {
743 if (dest->svc != svc) {
744 __ip_vs_unbind_svc(dest);
745 ip_vs_zero_stats(&dest->stats);
746 __ip_vs_bind_svc(dest, svc);
747 }
748 }
749
750 /* set the dest status flags */
751 dest->flags |= IP_VS_DEST_F_AVAILABLE;
752
753 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
754 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
755 dest->u_threshold = udest->u_threshold;
756 dest->l_threshold = udest->l_threshold;
757}
758
759
760/*
761 * Create a destination for the given service
762 */
763static int
764ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
765 struct ip_vs_dest **dest_p)
766{
767 struct ip_vs_dest *dest;
768 unsigned atype;
769
770 EnterFunction(2);
771
6b175b26 772 atype = inet_addr_type(&init_net, udest->addr);
1da177e4
LT
773 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
774 return -EINVAL;
775
0da974f4 776 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
777 if (dest == NULL) {
778 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
779 return -ENOMEM;
780 }
1da177e4
LT
781
782 dest->protocol = svc->protocol;
e7ade46a 783 dest->vaddr.ip = svc->addr.ip;
1da177e4
LT
784 dest->vport = svc->port;
785 dest->vfwmark = svc->fwmark;
e7ade46a 786 dest->addr.ip = udest->addr;
1da177e4
LT
787 dest->port = udest->port;
788
789 atomic_set(&dest->activeconns, 0);
790 atomic_set(&dest->inactconns, 0);
791 atomic_set(&dest->persistconns, 0);
792 atomic_set(&dest->refcnt, 0);
793
794 INIT_LIST_HEAD(&dest->d_list);
795 spin_lock_init(&dest->dst_lock);
796 spin_lock_init(&dest->stats.lock);
797 __ip_vs_update_dest(svc, dest, udest);
798 ip_vs_new_estimator(&dest->stats);
799
800 *dest_p = dest;
801
802 LeaveFunction(2);
803 return 0;
804}
805
806
807/*
808 * Add a destination into an existing service
809 */
810static int
811ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
812{
813 struct ip_vs_dest *dest;
014d730d
AV
814 __be32 daddr = udest->addr;
815 __be16 dport = udest->port;
1da177e4
LT
816 int ret;
817
818 EnterFunction(2);
819
820 if (udest->weight < 0) {
821 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
822 return -ERANGE;
823 }
824
825 if (udest->l_threshold > udest->u_threshold) {
826 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
827 "upper threshold\n");
828 return -ERANGE;
829 }
830
831 /*
832 * Check if the dest already exists in the list
833 */
834 dest = ip_vs_lookup_dest(svc, daddr, dport);
835 if (dest != NULL) {
836 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
837 return -EEXIST;
838 }
839
840 /*
841 * Check if the dest already exists in the trash and
842 * is from the same service
843 */
844 dest = ip_vs_trash_get_dest(svc, daddr, dport);
845 if (dest != NULL) {
846 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 847 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
848 NIPQUAD(daddr), ntohs(dport),
849 atomic_read(&dest->refcnt),
850 dest->vfwmark,
e7ade46a 851 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
852 ntohs(dest->vport));
853 __ip_vs_update_dest(svc, dest, udest);
854
855 /*
856 * Get the destination from the trash
857 */
858 list_del(&dest->n_list);
859
860 ip_vs_new_estimator(&dest->stats);
861
862 write_lock_bh(&__ip_vs_svc_lock);
863
864 /*
865 * Wait until all other svc users go away.
866 */
867 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
868
869 list_add(&dest->n_list, &svc->destinations);
870 svc->num_dests++;
871
872 /* call the update_service function of its scheduler */
82dfb6f3
SW
873 if (svc->scheduler->update_service)
874 svc->scheduler->update_service(svc);
1da177e4
LT
875
876 write_unlock_bh(&__ip_vs_svc_lock);
877 return 0;
878 }
879
880 /*
881 * Allocate and initialize the dest structure
882 */
883 ret = ip_vs_new_dest(svc, udest, &dest);
884 if (ret) {
885 return ret;
886 }
887
888 /*
889 * Add the dest entry into the list
890 */
891 atomic_inc(&dest->refcnt);
892
893 write_lock_bh(&__ip_vs_svc_lock);
894
895 /*
896 * Wait until all other svc users go away.
897 */
898 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
899
900 list_add(&dest->n_list, &svc->destinations);
901 svc->num_dests++;
902
903 /* call the update_service function of its scheduler */
82dfb6f3
SW
904 if (svc->scheduler->update_service)
905 svc->scheduler->update_service(svc);
1da177e4
LT
906
907 write_unlock_bh(&__ip_vs_svc_lock);
908
909 LeaveFunction(2);
910
911 return 0;
912}
913
914
915/*
916 * Edit a destination in the given service
917 */
918static int
919ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
920{
921 struct ip_vs_dest *dest;
014d730d
AV
922 __be32 daddr = udest->addr;
923 __be16 dport = udest->port;
1da177e4
LT
924
925 EnterFunction(2);
926
927 if (udest->weight < 0) {
928 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
929 return -ERANGE;
930 }
931
932 if (udest->l_threshold > udest->u_threshold) {
933 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
934 "upper threshold\n");
935 return -ERANGE;
936 }
937
938 /*
939 * Lookup the destination list
940 */
941 dest = ip_vs_lookup_dest(svc, daddr, dport);
942 if (dest == NULL) {
943 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
944 return -ENOENT;
945 }
946
947 __ip_vs_update_dest(svc, dest, udest);
948
949 write_lock_bh(&__ip_vs_svc_lock);
950
951 /* Wait until all other svc users go away */
cae7ca3d 952 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
953
954 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
955 if (svc->scheduler->update_service)
956 svc->scheduler->update_service(svc);
1da177e4
LT
957
958 write_unlock_bh(&__ip_vs_svc_lock);
959
960 LeaveFunction(2);
961
962 return 0;
963}
964
965
966/*
967 * Delete a destination (must be already unlinked from the service)
968 */
969static void __ip_vs_del_dest(struct ip_vs_dest *dest)
970{
971 ip_vs_kill_estimator(&dest->stats);
972
973 /*
974 * Remove it from the d-linked list with the real services.
975 */
976 write_lock_bh(&__ip_vs_rs_lock);
977 ip_vs_rs_unhash(dest);
978 write_unlock_bh(&__ip_vs_rs_lock);
979
980 /*
981 * Decrease the refcnt of the dest, and free the dest
982 * if nobody refers to it (refcnt=0). Otherwise, throw
983 * the destination into the trash.
984 */
985 if (atomic_dec_and_test(&dest->refcnt)) {
986 ip_vs_dst_reset(dest);
987 /* simply decrease svc->refcnt here, let the caller check
988 and release the service if nobody refers to it.
989 Only user context can release destination and service,
990 and only one user context can update virtual service at a
991 time, so the operation here is OK */
992 atomic_dec(&dest->svc->refcnt);
993 kfree(dest);
994 } else {
4b5bdf5c
RN
995 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
996 "dest->refcnt=%d\n",
e7ade46a 997 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
998 atomic_read(&dest->refcnt));
999 list_add(&dest->n_list, &ip_vs_dest_trash);
1000 atomic_inc(&dest->refcnt);
1001 }
1002}
1003
1004
1005/*
1006 * Unlink a destination from the given service
1007 */
1008static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1009 struct ip_vs_dest *dest,
1010 int svcupd)
1011{
1012 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1013
1014 /*
1015 * Remove it from the d-linked destination list.
1016 */
1017 list_del(&dest->n_list);
1018 svc->num_dests--;
82dfb6f3
SW
1019
1020 /*
1021 * Call the update_service function of its scheduler
1022 */
1023 if (svcupd && svc->scheduler->update_service)
1024 svc->scheduler->update_service(svc);
1da177e4
LT
1025}
1026
1027
1028/*
1029 * Delete a destination server in the given service
1030 */
1031static int
1032ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1033{
1034 struct ip_vs_dest *dest;
014d730d
AV
1035 __be32 daddr = udest->addr;
1036 __be16 dport = udest->port;
1da177e4
LT
1037
1038 EnterFunction(2);
1039
1040 dest = ip_vs_lookup_dest(svc, daddr, dport);
1041 if (dest == NULL) {
1042 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1043 return -ENOENT;
1044 }
1045
1046 write_lock_bh(&__ip_vs_svc_lock);
1047
1048 /*
1049 * Wait until all other svc users go away.
1050 */
1051 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1052
1053 /*
1054 * Unlink dest from the service
1055 */
1056 __ip_vs_unlink_dest(svc, dest, 1);
1057
1058 write_unlock_bh(&__ip_vs_svc_lock);
1059
1060 /*
1061 * Delete the destination
1062 */
1063 __ip_vs_del_dest(dest);
1064
1065 LeaveFunction(2);
1066
1067 return 0;
1068}
1069
1070
1071/*
1072 * Add a service into the service hash table
1073 */
1074static int
1075ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1076{
1077 int ret = 0;
1078 struct ip_vs_scheduler *sched = NULL;
1079 struct ip_vs_service *svc = NULL;
1080
1081 /* increase the module use count */
1082 ip_vs_use_count_inc();
1083
1084 /* Lookup the scheduler by 'u->sched_name' */
1085 sched = ip_vs_scheduler_get(u->sched_name);
1086 if (sched == NULL) {
1087 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1088 u->sched_name);
1089 ret = -ENOENT;
1090 goto out_mod_dec;
1091 }
1092
0da974f4 1093 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1094 if (svc == NULL) {
1095 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1096 ret = -ENOMEM;
1097 goto out_err;
1098 }
1da177e4
LT
1099
1100 /* I'm the first user of the service */
1101 atomic_set(&svc->usecnt, 1);
1102 atomic_set(&svc->refcnt, 0);
1103
1104 svc->protocol = u->protocol;
e7ade46a 1105 svc->addr.ip = u->addr;
1da177e4
LT
1106 svc->port = u->port;
1107 svc->fwmark = u->fwmark;
1108 svc->flags = u->flags;
1109 svc->timeout = u->timeout * HZ;
1110 svc->netmask = u->netmask;
1111
1112 INIT_LIST_HEAD(&svc->destinations);
1113 rwlock_init(&svc->sched_lock);
1114 spin_lock_init(&svc->stats.lock);
1115
1116 /* Bind the scheduler */
1117 ret = ip_vs_bind_scheduler(svc, sched);
1118 if (ret)
1119 goto out_err;
1120 sched = NULL;
1121
1122 /* Update the virtual service counters */
1123 if (svc->port == FTPPORT)
1124 atomic_inc(&ip_vs_ftpsvc_counter);
1125 else if (svc->port == 0)
1126 atomic_inc(&ip_vs_nullsvc_counter);
1127
1128 ip_vs_new_estimator(&svc->stats);
1129 ip_vs_num_services++;
1130
1131 /* Hash the service into the service table */
1132 write_lock_bh(&__ip_vs_svc_lock);
1133 ip_vs_svc_hash(svc);
1134 write_unlock_bh(&__ip_vs_svc_lock);
1135
1136 *svc_p = svc;
1137 return 0;
1138
1139 out_err:
1140 if (svc != NULL) {
1141 if (svc->scheduler)
1142 ip_vs_unbind_scheduler(svc);
1143 if (svc->inc) {
1144 local_bh_disable();
1145 ip_vs_app_inc_put(svc->inc);
1146 local_bh_enable();
1147 }
1148 kfree(svc);
1149 }
1150 ip_vs_scheduler_put(sched);
1151
1152 out_mod_dec:
1153 /* decrease the module use count */
1154 ip_vs_use_count_dec();
1155
1156 return ret;
1157}
1158
1159
1160/*
1161 * Edit a service and bind it with a new scheduler
1162 */
1163static int
1164ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1165{
1166 struct ip_vs_scheduler *sched, *old_sched;
1167 int ret = 0;
1168
1169 /*
1170 * Lookup the scheduler, by 'u->sched_name'
1171 */
1172 sched = ip_vs_scheduler_get(u->sched_name);
1173 if (sched == NULL) {
1174 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175 u->sched_name);
1176 return -ENOENT;
1177 }
1178 old_sched = sched;
1179
1180 write_lock_bh(&__ip_vs_svc_lock);
1181
1182 /*
1183 * Wait until all other svc users go away.
1184 */
1185 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1186
1187 /*
1188 * Set the flags and timeout value
1189 */
1190 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1191 svc->timeout = u->timeout * HZ;
1192 svc->netmask = u->netmask;
1193
1194 old_sched = svc->scheduler;
1195 if (sched != old_sched) {
1196 /*
1197 * Unbind the old scheduler
1198 */
1199 if ((ret = ip_vs_unbind_scheduler(svc))) {
1200 old_sched = sched;
1201 goto out;
1202 }
1203
1204 /*
1205 * Bind the new scheduler
1206 */
1207 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1208 /*
1209 * If ip_vs_bind_scheduler fails, restore the old
1210 * scheduler.
1211 * The main reason of failure is out of memory.
1212 *
1213 * The question is if the old scheduler can be
1214 * restored all the time. TODO: if it cannot be
1215 * restored some time, we must delete the service,
1216 * otherwise the system may crash.
1217 */
1218 ip_vs_bind_scheduler(svc, old_sched);
1219 old_sched = sched;
1220 goto out;
1221 }
1222 }
1223
1224 out:
1225 write_unlock_bh(&__ip_vs_svc_lock);
1226
1227 if (old_sched)
1228 ip_vs_scheduler_put(old_sched);
1229
1230 return ret;
1231}
1232
1233
1234/*
1235 * Delete a service from the service list
1236 * - The service must be unlinked, unlocked and not referenced!
1237 * - We are called under _bh lock
1238 */
1239static void __ip_vs_del_service(struct ip_vs_service *svc)
1240{
1241 struct ip_vs_dest *dest, *nxt;
1242 struct ip_vs_scheduler *old_sched;
1243
1244 ip_vs_num_services--;
1245 ip_vs_kill_estimator(&svc->stats);
1246
1247 /* Unbind scheduler */
1248 old_sched = svc->scheduler;
1249 ip_vs_unbind_scheduler(svc);
1250 if (old_sched)
1251 ip_vs_scheduler_put(old_sched);
1252
1253 /* Unbind app inc */
1254 if (svc->inc) {
1255 ip_vs_app_inc_put(svc->inc);
1256 svc->inc = NULL;
1257 }
1258
1259 /*
1260 * Unlink the whole destination list
1261 */
1262 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1263 __ip_vs_unlink_dest(svc, dest, 0);
1264 __ip_vs_del_dest(dest);
1265 }
1266
1267 /*
1268 * Update the virtual service counters
1269 */
1270 if (svc->port == FTPPORT)
1271 atomic_dec(&ip_vs_ftpsvc_counter);
1272 else if (svc->port == 0)
1273 atomic_dec(&ip_vs_nullsvc_counter);
1274
1275 /*
1276 * Free the service if nobody refers to it
1277 */
1278 if (atomic_read(&svc->refcnt) == 0)
1279 kfree(svc);
1280
1281 /* decrease the module use count */
1282 ip_vs_use_count_dec();
1283}
1284
1285/*
1286 * Delete a service from the service list
1287 */
1288static int ip_vs_del_service(struct ip_vs_service *svc)
1289{
1290 if (svc == NULL)
1291 return -EEXIST;
1292
1293 /*
1294 * Unhash it from the service table
1295 */
1296 write_lock_bh(&__ip_vs_svc_lock);
1297
1298 ip_vs_svc_unhash(svc);
1299
1300 /*
1301 * Wait until all the svc users go away.
1302 */
1303 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1304
1305 __ip_vs_del_service(svc);
1306
1307 write_unlock_bh(&__ip_vs_svc_lock);
1308
1309 return 0;
1310}
1311
1312
1313/*
1314 * Flush all the virtual services
1315 */
1316static int ip_vs_flush(void)
1317{
1318 int idx;
1319 struct ip_vs_service *svc, *nxt;
1320
1321 /*
1322 * Flush the service table hashed by <protocol,addr,port>
1323 */
1324 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1325 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1326 write_lock_bh(&__ip_vs_svc_lock);
1327 ip_vs_svc_unhash(svc);
1328 /*
1329 * Wait until all the svc users go away.
1330 */
1331 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1332 __ip_vs_del_service(svc);
1333 write_unlock_bh(&__ip_vs_svc_lock);
1334 }
1335 }
1336
1337 /*
1338 * Flush the service table hashed by fwmark
1339 */
1340 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1341 list_for_each_entry_safe(svc, nxt,
1342 &ip_vs_svc_fwm_table[idx], f_list) {
1343 write_lock_bh(&__ip_vs_svc_lock);
1344 ip_vs_svc_unhash(svc);
1345 /*
1346 * Wait until all the svc users go away.
1347 */
1348 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1349 __ip_vs_del_service(svc);
1350 write_unlock_bh(&__ip_vs_svc_lock);
1351 }
1352 }
1353
1354 return 0;
1355}
1356
1357
1358/*
1359 * Zero counters in a service or all services
1360 */
1361static int ip_vs_zero_service(struct ip_vs_service *svc)
1362{
1363 struct ip_vs_dest *dest;
1364
1365 write_lock_bh(&__ip_vs_svc_lock);
1366 list_for_each_entry(dest, &svc->destinations, n_list) {
1367 ip_vs_zero_stats(&dest->stats);
1368 }
1369 ip_vs_zero_stats(&svc->stats);
1370 write_unlock_bh(&__ip_vs_svc_lock);
1371 return 0;
1372}
1373
1374static int ip_vs_zero_all(void)
1375{
1376 int idx;
1377 struct ip_vs_service *svc;
1378
1379 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1380 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1381 ip_vs_zero_service(svc);
1382 }
1383 }
1384
1385 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1386 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1387 ip_vs_zero_service(svc);
1388 }
1389 }
1390
1391 ip_vs_zero_stats(&ip_vs_stats);
1392 return 0;
1393}
1394
1395
1396static int
1397proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1398 void __user *buffer, size_t *lenp, loff_t *ppos)
1399{
1400 int *valp = table->data;
1401 int val = *valp;
1402 int rc;
1403
1404 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1405 if (write && (*valp != val)) {
1406 if ((*valp < 0) || (*valp > 3)) {
1407 /* Restore the correct value */
1408 *valp = val;
1409 } else {
1da177e4 1410 update_defense_level();
1da177e4
LT
1411 }
1412 }
1413 return rc;
1414}
1415
1416
1417static int
1418proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1419 void __user *buffer, size_t *lenp, loff_t *ppos)
1420{
1421 int *valp = table->data;
1422 int val[2];
1423 int rc;
1424
1425 /* backup the value first */
1426 memcpy(val, valp, sizeof(val));
1427
1428 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1429 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1430 /* Restore the correct value */
1431 memcpy(valp, val, sizeof(val));
1432 }
1433 return rc;
1434}
1435
1436
1437/*
1438 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1439 */
1440
1441static struct ctl_table vs_vars[] = {
1442 {
1da177e4
LT
1443 .procname = "amemthresh",
1444 .data = &sysctl_ip_vs_amemthresh,
1445 .maxlen = sizeof(int),
1446 .mode = 0644,
1447 .proc_handler = &proc_dointvec,
1448 },
1449#ifdef CONFIG_IP_VS_DEBUG
1450 {
1da177e4
LT
1451 .procname = "debug_level",
1452 .data = &sysctl_ip_vs_debug_level,
1453 .maxlen = sizeof(int),
1454 .mode = 0644,
1455 .proc_handler = &proc_dointvec,
1456 },
1457#endif
1458 {
1da177e4
LT
1459 .procname = "am_droprate",
1460 .data = &sysctl_ip_vs_am_droprate,
1461 .maxlen = sizeof(int),
1462 .mode = 0644,
1463 .proc_handler = &proc_dointvec,
1464 },
1465 {
1da177e4
LT
1466 .procname = "drop_entry",
1467 .data = &sysctl_ip_vs_drop_entry,
1468 .maxlen = sizeof(int),
1469 .mode = 0644,
1470 .proc_handler = &proc_do_defense_mode,
1471 },
1472 {
1da177e4
LT
1473 .procname = "drop_packet",
1474 .data = &sysctl_ip_vs_drop_packet,
1475 .maxlen = sizeof(int),
1476 .mode = 0644,
1477 .proc_handler = &proc_do_defense_mode,
1478 },
1479 {
1da177e4
LT
1480 .procname = "secure_tcp",
1481 .data = &sysctl_ip_vs_secure_tcp,
1482 .maxlen = sizeof(int),
1483 .mode = 0644,
1484 .proc_handler = &proc_do_defense_mode,
1485 },
1486#if 0
1487 {
1da177e4
LT
1488 .procname = "timeout_established",
1489 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1490 .maxlen = sizeof(int),
1491 .mode = 0644,
1492 .proc_handler = &proc_dointvec_jiffies,
1493 },
1494 {
1da177e4
LT
1495 .procname = "timeout_synsent",
1496 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1497 .maxlen = sizeof(int),
1498 .mode = 0644,
1499 .proc_handler = &proc_dointvec_jiffies,
1500 },
1501 {
1da177e4
LT
1502 .procname = "timeout_synrecv",
1503 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1504 .maxlen = sizeof(int),
1505 .mode = 0644,
1506 .proc_handler = &proc_dointvec_jiffies,
1507 },
1508 {
1da177e4
LT
1509 .procname = "timeout_finwait",
1510 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1511 .maxlen = sizeof(int),
1512 .mode = 0644,
1513 .proc_handler = &proc_dointvec_jiffies,
1514 },
1515 {
1da177e4
LT
1516 .procname = "timeout_timewait",
1517 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1518 .maxlen = sizeof(int),
1519 .mode = 0644,
1520 .proc_handler = &proc_dointvec_jiffies,
1521 },
1522 {
1da177e4
LT
1523 .procname = "timeout_close",
1524 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1525 .maxlen = sizeof(int),
1526 .mode = 0644,
1527 .proc_handler = &proc_dointvec_jiffies,
1528 },
1529 {
1da177e4
LT
1530 .procname = "timeout_closewait",
1531 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1532 .maxlen = sizeof(int),
1533 .mode = 0644,
1534 .proc_handler = &proc_dointvec_jiffies,
1535 },
1536 {
1da177e4
LT
1537 .procname = "timeout_lastack",
1538 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1539 .maxlen = sizeof(int),
1540 .mode = 0644,
1541 .proc_handler = &proc_dointvec_jiffies,
1542 },
1543 {
1da177e4
LT
1544 .procname = "timeout_listen",
1545 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1546 .maxlen = sizeof(int),
1547 .mode = 0644,
1548 .proc_handler = &proc_dointvec_jiffies,
1549 },
1550 {
1da177e4
LT
1551 .procname = "timeout_synack",
1552 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
1555 .proc_handler = &proc_dointvec_jiffies,
1556 },
1557 {
1da177e4
LT
1558 .procname = "timeout_udp",
1559 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1560 .maxlen = sizeof(int),
1561 .mode = 0644,
1562 .proc_handler = &proc_dointvec_jiffies,
1563 },
1564 {
1da177e4
LT
1565 .procname = "timeout_icmp",
1566 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1567 .maxlen = sizeof(int),
1568 .mode = 0644,
1569 .proc_handler = &proc_dointvec_jiffies,
1570 },
1571#endif
1572 {
1da177e4
LT
1573 .procname = "cache_bypass",
1574 .data = &sysctl_ip_vs_cache_bypass,
1575 .maxlen = sizeof(int),
1576 .mode = 0644,
1577 .proc_handler = &proc_dointvec,
1578 },
1579 {
1da177e4
LT
1580 .procname = "expire_nodest_conn",
1581 .data = &sysctl_ip_vs_expire_nodest_conn,
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
1584 .proc_handler = &proc_dointvec,
1585 },
1586 {
1da177e4
LT
1587 .procname = "expire_quiescent_template",
1588 .data = &sysctl_ip_vs_expire_quiescent_template,
1589 .maxlen = sizeof(int),
1590 .mode = 0644,
1591 .proc_handler = &proc_dointvec,
1592 },
1593 {
1da177e4
LT
1594 .procname = "sync_threshold",
1595 .data = &sysctl_ip_vs_sync_threshold,
1596 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1597 .mode = 0644,
1598 .proc_handler = &proc_do_sync_threshold,
1599 },
1600 {
1da177e4
LT
1601 .procname = "nat_icmp_send",
1602 .data = &sysctl_ip_vs_nat_icmp_send,
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_dointvec,
1606 },
1607 { .ctl_name = 0 }
1608};
1609
5587da55 1610const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1611 { .procname = "net", .ctl_name = CTL_NET, },
1612 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1613 { .procname = "vs", },
1614 { }
1da177e4 1615};
90754f8e 1616EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1617
1618static struct ctl_table_header * sysctl_header;
1619
1620#ifdef CONFIG_PROC_FS
1621
1622struct ip_vs_iter {
1623 struct list_head *table;
1624 int bucket;
1625};
1626
1627/*
1628 * Write the contents of the VS rule table to a PROCfs file.
1629 * (It is kept just for backward compatibility)
1630 */
1631static inline const char *ip_vs_fwd_name(unsigned flags)
1632{
1633 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1634 case IP_VS_CONN_F_LOCALNODE:
1635 return "Local";
1636 case IP_VS_CONN_F_TUNNEL:
1637 return "Tunnel";
1638 case IP_VS_CONN_F_DROUTE:
1639 return "Route";
1640 default:
1641 return "Masq";
1642 }
1643}
1644
1645
1646/* Get the Nth entry in the two lists */
1647static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1648{
1649 struct ip_vs_iter *iter = seq->private;
1650 int idx;
1651 struct ip_vs_service *svc;
1652
1653 /* look in hash by protocol */
1654 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1655 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1656 if (pos-- == 0){
1657 iter->table = ip_vs_svc_table;
1658 iter->bucket = idx;
1659 return svc;
1660 }
1661 }
1662 }
1663
1664 /* keep looking in fwmark */
1665 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1666 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1667 if (pos-- == 0) {
1668 iter->table = ip_vs_svc_fwm_table;
1669 iter->bucket = idx;
1670 return svc;
1671 }
1672 }
1673 }
1674
1675 return NULL;
1676}
1677
1678static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1679{
1680
1681 read_lock_bh(&__ip_vs_svc_lock);
1682 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1683}
1684
1685
1686static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1687{
1688 struct list_head *e;
1689 struct ip_vs_iter *iter;
1690 struct ip_vs_service *svc;
1691
1692 ++*pos;
1693 if (v == SEQ_START_TOKEN)
1694 return ip_vs_info_array(seq,0);
1695
1696 svc = v;
1697 iter = seq->private;
1698
1699 if (iter->table == ip_vs_svc_table) {
1700 /* next service in table hashed by protocol */
1701 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1702 return list_entry(e, struct ip_vs_service, s_list);
1703
1704
1705 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1706 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1707 s_list) {
1708 return svc;
1709 }
1710 }
1711
1712 iter->table = ip_vs_svc_fwm_table;
1713 iter->bucket = -1;
1714 goto scan_fwmark;
1715 }
1716
1717 /* next service in hashed by fwmark */
1718 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1719 return list_entry(e, struct ip_vs_service, f_list);
1720
1721 scan_fwmark:
1722 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1723 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1724 f_list)
1725 return svc;
1726 }
1727
1728 return NULL;
1729}
1730
1731static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1732{
1733 read_unlock_bh(&__ip_vs_svc_lock);
1734}
1735
1736
1737static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1738{
1739 if (v == SEQ_START_TOKEN) {
1740 seq_printf(seq,
1741 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1742 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1743 seq_puts(seq,
1744 "Prot LocalAddress:Port Scheduler Flags\n");
1745 seq_puts(seq,
1746 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1747 } else {
1748 const struct ip_vs_service *svc = v;
1749 const struct ip_vs_iter *iter = seq->private;
1750 const struct ip_vs_dest *dest;
1751
1752 if (iter->table == ip_vs_svc_table)
1753 seq_printf(seq, "%s %08X:%04X %s ",
1754 ip_vs_proto_name(svc->protocol),
e7ade46a 1755 ntohl(svc->addr.ip),
1da177e4
LT
1756 ntohs(svc->port),
1757 svc->scheduler->name);
1758 else
1759 seq_printf(seq, "FWM %08X %s ",
1760 svc->fwmark, svc->scheduler->name);
1761
1762 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1763 seq_printf(seq, "persistent %d %08X\n",
1764 svc->timeout,
1765 ntohl(svc->netmask));
1766 else
1767 seq_putc(seq, '\n');
1768
1769 list_for_each_entry(dest, &svc->destinations, n_list) {
1770 seq_printf(seq,
1771 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
e7ade46a 1772 ntohl(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1773 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1774 atomic_read(&dest->weight),
1775 atomic_read(&dest->activeconns),
1776 atomic_read(&dest->inactconns));
1777 }
1778 }
1779 return 0;
1780}
1781
56b3d975 1782static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1783 .start = ip_vs_info_seq_start,
1784 .next = ip_vs_info_seq_next,
1785 .stop = ip_vs_info_seq_stop,
1786 .show = ip_vs_info_seq_show,
1787};
1788
1789static int ip_vs_info_open(struct inode *inode, struct file *file)
1790{
cf7732e4
PE
1791 return seq_open_private(file, &ip_vs_info_seq_ops,
1792 sizeof(struct ip_vs_iter));
1da177e4
LT
1793}
1794
9a32144e 1795static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1796 .owner = THIS_MODULE,
1797 .open = ip_vs_info_open,
1798 .read = seq_read,
1799 .llseek = seq_lseek,
1800 .release = seq_release_private,
1801};
1802
1803#endif
1804
519e49e8
SW
1805struct ip_vs_stats ip_vs_stats = {
1806 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1807};
1da177e4
LT
1808
1809#ifdef CONFIG_PROC_FS
1810static int ip_vs_stats_show(struct seq_file *seq, void *v)
1811{
1812
1813/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1814 seq_puts(seq,
1815 " Total Incoming Outgoing Incoming Outgoing\n");
1816 seq_printf(seq,
1817 " Conns Packets Packets Bytes Bytes\n");
1818
1819 spin_lock_bh(&ip_vs_stats.lock);
1820 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1821 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1822 (unsigned long long) ip_vs_stats.inbytes,
1823 (unsigned long long) ip_vs_stats.outbytes);
1824
1825/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1826 seq_puts(seq,
1827 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1828 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1829 ip_vs_stats.cps,
1830 ip_vs_stats.inpps,
1831 ip_vs_stats.outpps,
1832 ip_vs_stats.inbps,
1833 ip_vs_stats.outbps);
1834 spin_unlock_bh(&ip_vs_stats.lock);
1835
1836 return 0;
1837}
1838
1839static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1840{
1841 return single_open(file, ip_vs_stats_show, NULL);
1842}
1843
9a32144e 1844static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1845 .owner = THIS_MODULE,
1846 .open = ip_vs_stats_seq_open,
1847 .read = seq_read,
1848 .llseek = seq_lseek,
1849 .release = single_release,
1850};
1851
1852#endif
1853
1854/*
1855 * Set timeout values for tcp tcpfin udp in the timeout_table.
1856 */
1857static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1858{
1859 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1860 u->tcp_timeout,
1861 u->tcp_fin_timeout,
1862 u->udp_timeout);
1863
1864#ifdef CONFIG_IP_VS_PROTO_TCP
1865 if (u->tcp_timeout) {
1866 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1867 = u->tcp_timeout * HZ;
1868 }
1869
1870 if (u->tcp_fin_timeout) {
1871 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1872 = u->tcp_fin_timeout * HZ;
1873 }
1874#endif
1875
1876#ifdef CONFIG_IP_VS_PROTO_UDP
1877 if (u->udp_timeout) {
1878 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1879 = u->udp_timeout * HZ;
1880 }
1881#endif
1882 return 0;
1883}
1884
1885
1886#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1887#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1888#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1889 sizeof(struct ip_vs_dest_user))
1890#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1891#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1892#define MAX_ARG_LEN SVCDEST_ARG_LEN
1893
9b5b5cff 1894static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1895 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1896 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1897 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1898 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1899 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1900 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1901 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1902 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1903 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1904 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1905 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1906};
1907
1908static int
1909do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1910{
1911 int ret;
1912 unsigned char arg[MAX_ARG_LEN];
1913 struct ip_vs_service_user *usvc;
1914 struct ip_vs_service *svc;
1915 struct ip_vs_dest_user *udest;
1916
1917 if (!capable(CAP_NET_ADMIN))
1918 return -EPERM;
1919
1920 if (len != set_arglen[SET_CMDID(cmd)]) {
1921 IP_VS_ERR("set_ctl: len %u != %u\n",
1922 len, set_arglen[SET_CMDID(cmd)]);
1923 return -EINVAL;
1924 }
1925
1926 if (copy_from_user(arg, user, len) != 0)
1927 return -EFAULT;
1928
1929 /* increase the module use count */
1930 ip_vs_use_count_inc();
1931
14cc3e2b 1932 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1933 ret = -ERESTARTSYS;
1934 goto out_dec;
1935 }
1936
1937 if (cmd == IP_VS_SO_SET_FLUSH) {
1938 /* Flush the virtual service */
1939 ret = ip_vs_flush();
1940 goto out_unlock;
1941 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1942 /* Set timeout values for (tcp tcpfin udp) */
1943 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1944 goto out_unlock;
1945 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1946 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1947 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1948 goto out_unlock;
1949 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1950 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1951 ret = stop_sync_thread(dm->state);
1952 goto out_unlock;
1953 }
1954
1955 usvc = (struct ip_vs_service_user *)arg;
1956 udest = (struct ip_vs_dest_user *)(usvc + 1);
1957
1958 if (cmd == IP_VS_SO_SET_ZERO) {
1959 /* if no service address is set, zero counters in all */
1960 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1961 ret = ip_vs_zero_all();
1962 goto out_unlock;
1963 }
1964 }
1965
1966 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1967 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1968 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1969 usvc->protocol, NIPQUAD(usvc->addr),
1970 ntohs(usvc->port), usvc->sched_name);
1971 ret = -EFAULT;
1972 goto out_unlock;
1973 }
1974
1975 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1976 if (usvc->fwmark == 0)
1977 svc = __ip_vs_service_get(usvc->protocol,
1978 usvc->addr, usvc->port);
1979 else
1980 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1981
1982 if (cmd != IP_VS_SO_SET_ADD
1983 && (svc == NULL || svc->protocol != usvc->protocol)) {
1984 ret = -ESRCH;
1985 goto out_unlock;
1986 }
1987
1988 switch (cmd) {
1989 case IP_VS_SO_SET_ADD:
1990 if (svc != NULL)
1991 ret = -EEXIST;
1992 else
1993 ret = ip_vs_add_service(usvc, &svc);
1994 break;
1995 case IP_VS_SO_SET_EDIT:
1996 ret = ip_vs_edit_service(svc, usvc);
1997 break;
1998 case IP_VS_SO_SET_DEL:
1999 ret = ip_vs_del_service(svc);
2000 if (!ret)
2001 goto out_unlock;
2002 break;
2003 case IP_VS_SO_SET_ZERO:
2004 ret = ip_vs_zero_service(svc);
2005 break;
2006 case IP_VS_SO_SET_ADDDEST:
2007 ret = ip_vs_add_dest(svc, udest);
2008 break;
2009 case IP_VS_SO_SET_EDITDEST:
2010 ret = ip_vs_edit_dest(svc, udest);
2011 break;
2012 case IP_VS_SO_SET_DELDEST:
2013 ret = ip_vs_del_dest(svc, udest);
2014 break;
2015 default:
2016 ret = -EINVAL;
2017 }
2018
2019 if (svc)
2020 ip_vs_service_put(svc);
2021
2022 out_unlock:
14cc3e2b 2023 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2024 out_dec:
2025 /* decrease the module use count */
2026 ip_vs_use_count_dec();
2027
2028 return ret;
2029}
2030
2031
2032static void
2033ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2034{
2035 spin_lock_bh(&src->lock);
2036 memcpy(dst, src, (char*)&src->lock - (char*)src);
2037 spin_unlock_bh(&src->lock);
2038}
2039
2040static void
2041ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2042{
2043 dst->protocol = src->protocol;
e7ade46a 2044 dst->addr = src->addr.ip;
1da177e4
LT
2045 dst->port = src->port;
2046 dst->fwmark = src->fwmark;
4da62fc7 2047 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2048 dst->flags = src->flags;
2049 dst->timeout = src->timeout / HZ;
2050 dst->netmask = src->netmask;
2051 dst->num_dests = src->num_dests;
2052 ip_vs_copy_stats(&dst->stats, &src->stats);
2053}
2054
2055static inline int
2056__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2057 struct ip_vs_get_services __user *uptr)
2058{
2059 int idx, count=0;
2060 struct ip_vs_service *svc;
2061 struct ip_vs_service_entry entry;
2062 int ret = 0;
2063
2064 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2065 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2066 if (count >= get->num_services)
2067 goto out;
4da62fc7 2068 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2069 ip_vs_copy_service(&entry, svc);
2070 if (copy_to_user(&uptr->entrytable[count],
2071 &entry, sizeof(entry))) {
2072 ret = -EFAULT;
2073 goto out;
2074 }
2075 count++;
2076 }
2077 }
2078
2079 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2080 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2081 if (count >= get->num_services)
2082 goto out;
4da62fc7 2083 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2084 ip_vs_copy_service(&entry, svc);
2085 if (copy_to_user(&uptr->entrytable[count],
2086 &entry, sizeof(entry))) {
2087 ret = -EFAULT;
2088 goto out;
2089 }
2090 count++;
2091 }
2092 }
2093 out:
2094 return ret;
2095}
2096
2097static inline int
2098__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2099 struct ip_vs_get_dests __user *uptr)
2100{
2101 struct ip_vs_service *svc;
2102 int ret = 0;
2103
2104 if (get->fwmark)
2105 svc = __ip_vs_svc_fwm_get(get->fwmark);
2106 else
2107 svc = __ip_vs_service_get(get->protocol,
2108 get->addr, get->port);
2109 if (svc) {
2110 int count = 0;
2111 struct ip_vs_dest *dest;
2112 struct ip_vs_dest_entry entry;
2113
2114 list_for_each_entry(dest, &svc->destinations, n_list) {
2115 if (count >= get->num_dests)
2116 break;
2117
e7ade46a 2118 entry.addr = dest->addr.ip;
1da177e4
LT
2119 entry.port = dest->port;
2120 entry.conn_flags = atomic_read(&dest->conn_flags);
2121 entry.weight = atomic_read(&dest->weight);
2122 entry.u_threshold = dest->u_threshold;
2123 entry.l_threshold = dest->l_threshold;
2124 entry.activeconns = atomic_read(&dest->activeconns);
2125 entry.inactconns = atomic_read(&dest->inactconns);
2126 entry.persistconns = atomic_read(&dest->persistconns);
2127 ip_vs_copy_stats(&entry.stats, &dest->stats);
2128 if (copy_to_user(&uptr->entrytable[count],
2129 &entry, sizeof(entry))) {
2130 ret = -EFAULT;
2131 break;
2132 }
2133 count++;
2134 }
2135 ip_vs_service_put(svc);
2136 } else
2137 ret = -ESRCH;
2138 return ret;
2139}
2140
2141static inline void
2142__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2143{
2144#ifdef CONFIG_IP_VS_PROTO_TCP
2145 u->tcp_timeout =
2146 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2147 u->tcp_fin_timeout =
2148 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2149#endif
2150#ifdef CONFIG_IP_VS_PROTO_UDP
2151 u->udp_timeout =
2152 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2153#endif
2154}
2155
2156
2157#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2158#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2159#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2160#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2161#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2162#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2163#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2164
9b5b5cff 2165static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2166 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2167 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2168 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2169 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2170 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2171 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2172 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2173};
2174
2175static int
2176do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2177{
2178 unsigned char arg[128];
2179 int ret = 0;
2180
2181 if (!capable(CAP_NET_ADMIN))
2182 return -EPERM;
2183
2184 if (*len < get_arglen[GET_CMDID(cmd)]) {
2185 IP_VS_ERR("get_ctl: len %u < %u\n",
2186 *len, get_arglen[GET_CMDID(cmd)]);
2187 return -EINVAL;
2188 }
2189
2190 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2191 return -EFAULT;
2192
14cc3e2b 2193 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2194 return -ERESTARTSYS;
2195
2196 switch (cmd) {
2197 case IP_VS_SO_GET_VERSION:
2198 {
2199 char buf[64];
2200
2201 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2202 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2203 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2204 ret = -EFAULT;
2205 goto out;
2206 }
2207 *len = strlen(buf)+1;
2208 }
2209 break;
2210
2211 case IP_VS_SO_GET_INFO:
2212 {
2213 struct ip_vs_getinfo info;
2214 info.version = IP_VS_VERSION_CODE;
2215 info.size = IP_VS_CONN_TAB_SIZE;
2216 info.num_services = ip_vs_num_services;
2217 if (copy_to_user(user, &info, sizeof(info)) != 0)
2218 ret = -EFAULT;
2219 }
2220 break;
2221
2222 case IP_VS_SO_GET_SERVICES:
2223 {
2224 struct ip_vs_get_services *get;
2225 int size;
2226
2227 get = (struct ip_vs_get_services *)arg;
2228 size = sizeof(*get) +
2229 sizeof(struct ip_vs_service_entry) * get->num_services;
2230 if (*len != size) {
2231 IP_VS_ERR("length: %u != %u\n", *len, size);
2232 ret = -EINVAL;
2233 goto out;
2234 }
2235 ret = __ip_vs_get_service_entries(get, user);
2236 }
2237 break;
2238
2239 case IP_VS_SO_GET_SERVICE:
2240 {
2241 struct ip_vs_service_entry *entry;
2242 struct ip_vs_service *svc;
2243
2244 entry = (struct ip_vs_service_entry *)arg;
2245 if (entry->fwmark)
2246 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2247 else
2248 svc = __ip_vs_service_get(entry->protocol,
2249 entry->addr, entry->port);
2250 if (svc) {
2251 ip_vs_copy_service(entry, svc);
2252 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2253 ret = -EFAULT;
2254 ip_vs_service_put(svc);
2255 } else
2256 ret = -ESRCH;
2257 }
2258 break;
2259
2260 case IP_VS_SO_GET_DESTS:
2261 {
2262 struct ip_vs_get_dests *get;
2263 int size;
2264
2265 get = (struct ip_vs_get_dests *)arg;
2266 size = sizeof(*get) +
2267 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2268 if (*len != size) {
2269 IP_VS_ERR("length: %u != %u\n", *len, size);
2270 ret = -EINVAL;
2271 goto out;
2272 }
2273 ret = __ip_vs_get_dest_entries(get, user);
2274 }
2275 break;
2276
2277 case IP_VS_SO_GET_TIMEOUT:
2278 {
2279 struct ip_vs_timeout_user t;
2280
2281 __ip_vs_get_timeouts(&t);
2282 if (copy_to_user(user, &t, sizeof(t)) != 0)
2283 ret = -EFAULT;
2284 }
2285 break;
2286
2287 case IP_VS_SO_GET_DAEMON:
2288 {
2289 struct ip_vs_daemon_user d[2];
2290
2291 memset(&d, 0, sizeof(d));
2292 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2293 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2294 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2295 d[0].syncid = ip_vs_master_syncid;
2296 }
2297 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2298 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2299 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2300 d[1].syncid = ip_vs_backup_syncid;
2301 }
2302 if (copy_to_user(user, &d, sizeof(d)) != 0)
2303 ret = -EFAULT;
2304 }
2305 break;
2306
2307 default:
2308 ret = -EINVAL;
2309 }
2310
2311 out:
14cc3e2b 2312 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2313 return ret;
2314}
2315
2316
2317static struct nf_sockopt_ops ip_vs_sockopts = {
2318 .pf = PF_INET,
2319 .set_optmin = IP_VS_BASE_CTL,
2320 .set_optmax = IP_VS_SO_SET_MAX+1,
2321 .set = do_ip_vs_set_ctl,
2322 .get_optmin = IP_VS_BASE_CTL,
2323 .get_optmax = IP_VS_SO_GET_MAX+1,
2324 .get = do_ip_vs_get_ctl,
16fcec35 2325 .owner = THIS_MODULE,
1da177e4
LT
2326};
2327
9a812198
JV
2328/*
2329 * Generic Netlink interface
2330 */
2331
2332/* IPVS genetlink family */
2333static struct genl_family ip_vs_genl_family = {
2334 .id = GENL_ID_GENERATE,
2335 .hdrsize = 0,
2336 .name = IPVS_GENL_NAME,
2337 .version = IPVS_GENL_VERSION,
2338 .maxattr = IPVS_CMD_MAX,
2339};
2340
2341/* Policy used for first-level command attributes */
2342static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2343 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2344 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2345 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2346 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2347 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2348 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2349};
2350
2351/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2352static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2353 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2354 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2355 .len = IP_VS_IFNAME_MAXLEN },
2356 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2357};
2358
2359/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2360static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2361 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2362 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2363 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2364 .len = sizeof(union nf_inet_addr) },
2365 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2366 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2367 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2368 .len = IP_VS_SCHEDNAME_MAXLEN },
2369 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2370 .len = sizeof(struct ip_vs_flags) },
2371 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2372 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2373 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2374};
2375
2376/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2377static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2378 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2379 .len = sizeof(union nf_inet_addr) },
2380 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2381 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2382 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2383 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2384 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2385 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2386 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2387 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2388 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2389};
2390
2391static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2392 struct ip_vs_stats *stats)
2393{
2394 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2395 if (!nl_stats)
2396 return -EMSGSIZE;
2397
2398 spin_lock_bh(&stats->lock);
2399
2400 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2401 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2402 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2403 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2404 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2405 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2406 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2407 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2408 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2409 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2410
2411 spin_unlock_bh(&stats->lock);
2412
2413 nla_nest_end(skb, nl_stats);
2414
2415 return 0;
2416
2417nla_put_failure:
2418 spin_unlock_bh(&stats->lock);
2419 nla_nest_cancel(skb, nl_stats);
2420 return -EMSGSIZE;
2421}
2422
2423static int ip_vs_genl_fill_service(struct sk_buff *skb,
2424 struct ip_vs_service *svc)
2425{
2426 struct nlattr *nl_service;
2427 struct ip_vs_flags flags = { .flags = svc->flags,
2428 .mask = ~0 };
2429
2430 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2431 if (!nl_service)
2432 return -EMSGSIZE;
2433
2434 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2435
2436 if (svc->fwmark) {
2437 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2438 } else {
2439 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2440 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2441 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2442 }
2443
2444 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2445 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2446 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2447 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2448
2449 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2450 goto nla_put_failure;
2451
2452 nla_nest_end(skb, nl_service);
2453
2454 return 0;
2455
2456nla_put_failure:
2457 nla_nest_cancel(skb, nl_service);
2458 return -EMSGSIZE;
2459}
2460
2461static int ip_vs_genl_dump_service(struct sk_buff *skb,
2462 struct ip_vs_service *svc,
2463 struct netlink_callback *cb)
2464{
2465 void *hdr;
2466
2467 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2468 &ip_vs_genl_family, NLM_F_MULTI,
2469 IPVS_CMD_NEW_SERVICE);
2470 if (!hdr)
2471 return -EMSGSIZE;
2472
2473 if (ip_vs_genl_fill_service(skb, svc) < 0)
2474 goto nla_put_failure;
2475
2476 return genlmsg_end(skb, hdr);
2477
2478nla_put_failure:
2479 genlmsg_cancel(skb, hdr);
2480 return -EMSGSIZE;
2481}
2482
2483static int ip_vs_genl_dump_services(struct sk_buff *skb,
2484 struct netlink_callback *cb)
2485{
2486 int idx = 0, i;
2487 int start = cb->args[0];
2488 struct ip_vs_service *svc;
2489
2490 mutex_lock(&__ip_vs_mutex);
2491 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2492 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2493 if (++idx <= start)
2494 continue;
2495 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2496 idx--;
2497 goto nla_put_failure;
2498 }
2499 }
2500 }
2501
2502 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2503 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2504 if (++idx <= start)
2505 continue;
2506 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2507 idx--;
2508 goto nla_put_failure;
2509 }
2510 }
2511 }
2512
2513nla_put_failure:
2514 mutex_unlock(&__ip_vs_mutex);
2515 cb->args[0] = idx;
2516
2517 return skb->len;
2518}
2519
2520static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2521 struct nlattr *nla, int full_entry)
2522{
2523 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2524 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2525
2526 /* Parse mandatory identifying service fields first */
2527 if (nla == NULL ||
2528 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2529 return -EINVAL;
2530
2531 nla_af = attrs[IPVS_SVC_ATTR_AF];
2532 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2533 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2534 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2535 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2536
2537 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2538 return -EINVAL;
2539
2540 /* For now, only support IPv4 */
2541 if (nla_get_u16(nla_af) != AF_INET)
2542 return -EAFNOSUPPORT;
2543
2544 if (nla_fwmark) {
2545 usvc->protocol = IPPROTO_TCP;
2546 usvc->fwmark = nla_get_u32(nla_fwmark);
2547 } else {
2548 usvc->protocol = nla_get_u16(nla_protocol);
2549 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2550 usvc->port = nla_get_u16(nla_port);
2551 usvc->fwmark = 0;
2552 }
2553
2554 /* If a full entry was requested, check for the additional fields */
2555 if (full_entry) {
2556 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2557 *nla_netmask;
2558 struct ip_vs_flags flags;
2559 struct ip_vs_service *svc;
2560
2561 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2562 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2563 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2564 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2565
2566 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2567 return -EINVAL;
2568
2569 nla_memcpy(&flags, nla_flags, sizeof(flags));
2570
2571 /* prefill flags from service if it already exists */
2572 if (usvc->fwmark)
2573 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2574 else
2575 svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2576 usvc->port);
2577 if (svc) {
2578 usvc->flags = svc->flags;
2579 ip_vs_service_put(svc);
2580 } else
2581 usvc->flags = 0;
2582
2583 /* set new flags from userland */
2584 usvc->flags = (usvc->flags & ~flags.mask) |
2585 (flags.flags & flags.mask);
2586
2587 strlcpy(usvc->sched_name, nla_data(nla_sched),
2588 sizeof(usvc->sched_name));
2589 usvc->timeout = nla_get_u32(nla_timeout);
2590 usvc->netmask = nla_get_u32(nla_netmask);
2591 }
2592
2593 return 0;
2594}
2595
2596static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2597{
2598 struct ip_vs_service_user usvc;
2599 int ret;
2600
2601 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2602 if (ret)
2603 return ERR_PTR(ret);
2604
2605 if (usvc.fwmark)
2606 return __ip_vs_svc_fwm_get(usvc.fwmark);
2607 else
2608 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2609 usvc.port);
2610}
2611
2612static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2613{
2614 struct nlattr *nl_dest;
2615
2616 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2617 if (!nl_dest)
2618 return -EMSGSIZE;
2619
2620 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2621 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2622
2623 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2624 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2625 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2626 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2627 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2628 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2629 atomic_read(&dest->activeconns));
2630 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2631 atomic_read(&dest->inactconns));
2632 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2633 atomic_read(&dest->persistconns));
2634
2635 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2636 goto nla_put_failure;
2637
2638 nla_nest_end(skb, nl_dest);
2639
2640 return 0;
2641
2642nla_put_failure:
2643 nla_nest_cancel(skb, nl_dest);
2644 return -EMSGSIZE;
2645}
2646
2647static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2648 struct netlink_callback *cb)
2649{
2650 void *hdr;
2651
2652 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2653 &ip_vs_genl_family, NLM_F_MULTI,
2654 IPVS_CMD_NEW_DEST);
2655 if (!hdr)
2656 return -EMSGSIZE;
2657
2658 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2659 goto nla_put_failure;
2660
2661 return genlmsg_end(skb, hdr);
2662
2663nla_put_failure:
2664 genlmsg_cancel(skb, hdr);
2665 return -EMSGSIZE;
2666}
2667
2668static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2669 struct netlink_callback *cb)
2670{
2671 int idx = 0;
2672 int start = cb->args[0];
2673 struct ip_vs_service *svc;
2674 struct ip_vs_dest *dest;
2675 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2676
2677 mutex_lock(&__ip_vs_mutex);
2678
2679 /* Try to find the service for which to dump destinations */
2680 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2681 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2682 goto out_err;
2683
2684 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2685 if (IS_ERR(svc) || svc == NULL)
2686 goto out_err;
2687
2688 /* Dump the destinations */
2689 list_for_each_entry(dest, &svc->destinations, n_list) {
2690 if (++idx <= start)
2691 continue;
2692 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2693 idx--;
2694 goto nla_put_failure;
2695 }
2696 }
2697
2698nla_put_failure:
2699 cb->args[0] = idx;
2700 ip_vs_service_put(svc);
2701
2702out_err:
2703 mutex_unlock(&__ip_vs_mutex);
2704
2705 return skb->len;
2706}
2707
2708static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2709 struct nlattr *nla, int full_entry)
2710{
2711 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2712 struct nlattr *nla_addr, *nla_port;
2713
2714 /* Parse mandatory identifying destination fields first */
2715 if (nla == NULL ||
2716 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2717 return -EINVAL;
2718
2719 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2720 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2721
2722 if (!(nla_addr && nla_port))
2723 return -EINVAL;
2724
2725 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2726 udest->port = nla_get_u16(nla_port);
2727
2728 /* If a full entry was requested, check for the additional fields */
2729 if (full_entry) {
2730 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2731 *nla_l_thresh;
2732
2733 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2734 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2735 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2736 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2737
2738 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2739 return -EINVAL;
2740
2741 udest->conn_flags = nla_get_u32(nla_fwd)
2742 & IP_VS_CONN_F_FWD_MASK;
2743 udest->weight = nla_get_u32(nla_weight);
2744 udest->u_threshold = nla_get_u32(nla_u_thresh);
2745 udest->l_threshold = nla_get_u32(nla_l_thresh);
2746 }
2747
2748 return 0;
2749}
2750
2751static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2752 const char *mcast_ifn, __be32 syncid)
2753{
2754 struct nlattr *nl_daemon;
2755
2756 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2757 if (!nl_daemon)
2758 return -EMSGSIZE;
2759
2760 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2761 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2762 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2763
2764 nla_nest_end(skb, nl_daemon);
2765
2766 return 0;
2767
2768nla_put_failure:
2769 nla_nest_cancel(skb, nl_daemon);
2770 return -EMSGSIZE;
2771}
2772
2773static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2774 const char *mcast_ifn, __be32 syncid,
2775 struct netlink_callback *cb)
2776{
2777 void *hdr;
2778 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2779 &ip_vs_genl_family, NLM_F_MULTI,
2780 IPVS_CMD_NEW_DAEMON);
2781 if (!hdr)
2782 return -EMSGSIZE;
2783
2784 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2785 goto nla_put_failure;
2786
2787 return genlmsg_end(skb, hdr);
2788
2789nla_put_failure:
2790 genlmsg_cancel(skb, hdr);
2791 return -EMSGSIZE;
2792}
2793
2794static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2795 struct netlink_callback *cb)
2796{
2797 mutex_lock(&__ip_vs_mutex);
2798 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2799 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2800 ip_vs_master_mcast_ifn,
2801 ip_vs_master_syncid, cb) < 0)
2802 goto nla_put_failure;
2803
2804 cb->args[0] = 1;
2805 }
2806
2807 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2808 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2809 ip_vs_backup_mcast_ifn,
2810 ip_vs_backup_syncid, cb) < 0)
2811 goto nla_put_failure;
2812
2813 cb->args[1] = 1;
2814 }
2815
2816nla_put_failure:
2817 mutex_unlock(&__ip_vs_mutex);
2818
2819 return skb->len;
2820}
2821
2822static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2823{
2824 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2825 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2826 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2827 return -EINVAL;
2828
2829 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2830 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2831 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2832}
2833
2834static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2835{
2836 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2837 return -EINVAL;
2838
2839 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2840}
2841
2842static int ip_vs_genl_set_config(struct nlattr **attrs)
2843{
2844 struct ip_vs_timeout_user t;
2845
2846 __ip_vs_get_timeouts(&t);
2847
2848 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2849 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2850
2851 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2852 t.tcp_fin_timeout =
2853 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2854
2855 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2856 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2857
2858 return ip_vs_set_timeout(&t);
2859}
2860
2861static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2862{
2863 struct ip_vs_service *svc = NULL;
2864 struct ip_vs_service_user usvc;
2865 struct ip_vs_dest_user udest;
2866 int ret = 0, cmd;
2867 int need_full_svc = 0, need_full_dest = 0;
2868
2869 cmd = info->genlhdr->cmd;
2870
2871 mutex_lock(&__ip_vs_mutex);
2872
2873 if (cmd == IPVS_CMD_FLUSH) {
2874 ret = ip_vs_flush();
2875 goto out;
2876 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2877 ret = ip_vs_genl_set_config(info->attrs);
2878 goto out;
2879 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2880 cmd == IPVS_CMD_DEL_DAEMON) {
2881
2882 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2883
2884 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2885 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2886 info->attrs[IPVS_CMD_ATTR_DAEMON],
2887 ip_vs_daemon_policy)) {
2888 ret = -EINVAL;
2889 goto out;
2890 }
2891
2892 if (cmd == IPVS_CMD_NEW_DAEMON)
2893 ret = ip_vs_genl_new_daemon(daemon_attrs);
2894 else
2895 ret = ip_vs_genl_del_daemon(daemon_attrs);
2896 goto out;
2897 } else if (cmd == IPVS_CMD_ZERO &&
2898 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2899 ret = ip_vs_zero_all();
2900 goto out;
2901 }
2902
2903 /* All following commands require a service argument, so check if we
2904 * received a valid one. We need a full service specification when
2905 * adding / editing a service. Only identifying members otherwise. */
2906 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2907 need_full_svc = 1;
2908
2909 ret = ip_vs_genl_parse_service(&usvc,
2910 info->attrs[IPVS_CMD_ATTR_SERVICE],
2911 need_full_svc);
2912 if (ret)
2913 goto out;
2914
2915 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2916 if (usvc.fwmark == 0)
2917 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2918 else
2919 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2920
2921 /* Unless we're adding a new service, the service must already exist */
2922 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2923 ret = -ESRCH;
2924 goto out;
2925 }
2926
2927 /* Destination commands require a valid destination argument. For
2928 * adding / editing a destination, we need a full destination
2929 * specification. */
2930 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2931 cmd == IPVS_CMD_DEL_DEST) {
2932 if (cmd != IPVS_CMD_DEL_DEST)
2933 need_full_dest = 1;
2934
2935 ret = ip_vs_genl_parse_dest(&udest,
2936 info->attrs[IPVS_CMD_ATTR_DEST],
2937 need_full_dest);
2938 if (ret)
2939 goto out;
2940 }
2941
2942 switch (cmd) {
2943 case IPVS_CMD_NEW_SERVICE:
2944 if (svc == NULL)
2945 ret = ip_vs_add_service(&usvc, &svc);
2946 else
2947 ret = -EEXIST;
2948 break;
2949 case IPVS_CMD_SET_SERVICE:
2950 ret = ip_vs_edit_service(svc, &usvc);
2951 break;
2952 case IPVS_CMD_DEL_SERVICE:
2953 ret = ip_vs_del_service(svc);
2954 break;
2955 case IPVS_CMD_NEW_DEST:
2956 ret = ip_vs_add_dest(svc, &udest);
2957 break;
2958 case IPVS_CMD_SET_DEST:
2959 ret = ip_vs_edit_dest(svc, &udest);
2960 break;
2961 case IPVS_CMD_DEL_DEST:
2962 ret = ip_vs_del_dest(svc, &udest);
2963 break;
2964 case IPVS_CMD_ZERO:
2965 ret = ip_vs_zero_service(svc);
2966 break;
2967 default:
2968 ret = -EINVAL;
2969 }
2970
2971out:
2972 if (svc)
2973 ip_vs_service_put(svc);
2974 mutex_unlock(&__ip_vs_mutex);
2975
2976 return ret;
2977}
2978
2979static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2980{
2981 struct sk_buff *msg;
2982 void *reply;
2983 int ret, cmd, reply_cmd;
2984
2985 cmd = info->genlhdr->cmd;
2986
2987 if (cmd == IPVS_CMD_GET_SERVICE)
2988 reply_cmd = IPVS_CMD_NEW_SERVICE;
2989 else if (cmd == IPVS_CMD_GET_INFO)
2990 reply_cmd = IPVS_CMD_SET_INFO;
2991 else if (cmd == IPVS_CMD_GET_CONFIG)
2992 reply_cmd = IPVS_CMD_SET_CONFIG;
2993 else {
2994 IP_VS_ERR("unknown Generic Netlink command\n");
2995 return -EINVAL;
2996 }
2997
2998 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2999 if (!msg)
3000 return -ENOMEM;
3001
3002 mutex_lock(&__ip_vs_mutex);
3003
3004 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3005 if (reply == NULL)
3006 goto nla_put_failure;
3007
3008 switch (cmd) {
3009 case IPVS_CMD_GET_SERVICE:
3010 {
3011 struct ip_vs_service *svc;
3012
3013 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3014 if (IS_ERR(svc)) {
3015 ret = PTR_ERR(svc);
3016 goto out_err;
3017 } else if (svc) {
3018 ret = ip_vs_genl_fill_service(msg, svc);
3019 ip_vs_service_put(svc);
3020 if (ret)
3021 goto nla_put_failure;
3022 } else {
3023 ret = -ESRCH;
3024 goto out_err;
3025 }
3026
3027 break;
3028 }
3029
3030 case IPVS_CMD_GET_CONFIG:
3031 {
3032 struct ip_vs_timeout_user t;
3033
3034 __ip_vs_get_timeouts(&t);
3035#ifdef CONFIG_IP_VS_PROTO_TCP
3036 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3037 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3038 t.tcp_fin_timeout);
3039#endif
3040#ifdef CONFIG_IP_VS_PROTO_UDP
3041 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3042#endif
3043
3044 break;
3045 }
3046
3047 case IPVS_CMD_GET_INFO:
3048 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3049 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3050 IP_VS_CONN_TAB_SIZE);
3051 break;
3052 }
3053
3054 genlmsg_end(msg, reply);
3055 ret = genlmsg_unicast(msg, info->snd_pid);
3056 goto out;
3057
3058nla_put_failure:
3059 IP_VS_ERR("not enough space in Netlink message\n");
3060 ret = -EMSGSIZE;
3061
3062out_err:
3063 nlmsg_free(msg);
3064out:
3065 mutex_unlock(&__ip_vs_mutex);
3066
3067 return ret;
3068}
3069
3070
3071static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3072 {
3073 .cmd = IPVS_CMD_NEW_SERVICE,
3074 .flags = GENL_ADMIN_PERM,
3075 .policy = ip_vs_cmd_policy,
3076 .doit = ip_vs_genl_set_cmd,
3077 },
3078 {
3079 .cmd = IPVS_CMD_SET_SERVICE,
3080 .flags = GENL_ADMIN_PERM,
3081 .policy = ip_vs_cmd_policy,
3082 .doit = ip_vs_genl_set_cmd,
3083 },
3084 {
3085 .cmd = IPVS_CMD_DEL_SERVICE,
3086 .flags = GENL_ADMIN_PERM,
3087 .policy = ip_vs_cmd_policy,
3088 .doit = ip_vs_genl_set_cmd,
3089 },
3090 {
3091 .cmd = IPVS_CMD_GET_SERVICE,
3092 .flags = GENL_ADMIN_PERM,
3093 .doit = ip_vs_genl_get_cmd,
3094 .dumpit = ip_vs_genl_dump_services,
3095 .policy = ip_vs_cmd_policy,
3096 },
3097 {
3098 .cmd = IPVS_CMD_NEW_DEST,
3099 .flags = GENL_ADMIN_PERM,
3100 .policy = ip_vs_cmd_policy,
3101 .doit = ip_vs_genl_set_cmd,
3102 },
3103 {
3104 .cmd = IPVS_CMD_SET_DEST,
3105 .flags = GENL_ADMIN_PERM,
3106 .policy = ip_vs_cmd_policy,
3107 .doit = ip_vs_genl_set_cmd,
3108 },
3109 {
3110 .cmd = IPVS_CMD_DEL_DEST,
3111 .flags = GENL_ADMIN_PERM,
3112 .policy = ip_vs_cmd_policy,
3113 .doit = ip_vs_genl_set_cmd,
3114 },
3115 {
3116 .cmd = IPVS_CMD_GET_DEST,
3117 .flags = GENL_ADMIN_PERM,
3118 .policy = ip_vs_cmd_policy,
3119 .dumpit = ip_vs_genl_dump_dests,
3120 },
3121 {
3122 .cmd = IPVS_CMD_NEW_DAEMON,
3123 .flags = GENL_ADMIN_PERM,
3124 .policy = ip_vs_cmd_policy,
3125 .doit = ip_vs_genl_set_cmd,
3126 },
3127 {
3128 .cmd = IPVS_CMD_DEL_DAEMON,
3129 .flags = GENL_ADMIN_PERM,
3130 .policy = ip_vs_cmd_policy,
3131 .doit = ip_vs_genl_set_cmd,
3132 },
3133 {
3134 .cmd = IPVS_CMD_GET_DAEMON,
3135 .flags = GENL_ADMIN_PERM,
3136 .dumpit = ip_vs_genl_dump_daemons,
3137 },
3138 {
3139 .cmd = IPVS_CMD_SET_CONFIG,
3140 .flags = GENL_ADMIN_PERM,
3141 .policy = ip_vs_cmd_policy,
3142 .doit = ip_vs_genl_set_cmd,
3143 },
3144 {
3145 .cmd = IPVS_CMD_GET_CONFIG,
3146 .flags = GENL_ADMIN_PERM,
3147 .doit = ip_vs_genl_get_cmd,
3148 },
3149 {
3150 .cmd = IPVS_CMD_GET_INFO,
3151 .flags = GENL_ADMIN_PERM,
3152 .doit = ip_vs_genl_get_cmd,
3153 },
3154 {
3155 .cmd = IPVS_CMD_ZERO,
3156 .flags = GENL_ADMIN_PERM,
3157 .policy = ip_vs_cmd_policy,
3158 .doit = ip_vs_genl_set_cmd,
3159 },
3160 {
3161 .cmd = IPVS_CMD_FLUSH,
3162 .flags = GENL_ADMIN_PERM,
3163 .doit = ip_vs_genl_set_cmd,
3164 },
3165};
3166
3167static int __init ip_vs_genl_register(void)
3168{
3169 int ret, i;
3170
3171 ret = genl_register_family(&ip_vs_genl_family);
3172 if (ret)
3173 return ret;
3174
3175 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3176 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3177 if (ret)
3178 goto err_out;
3179 }
3180 return 0;
3181
3182err_out:
3183 genl_unregister_family(&ip_vs_genl_family);
3184 return ret;
3185}
3186
3187static void ip_vs_genl_unregister(void)
3188{
3189 genl_unregister_family(&ip_vs_genl_family);
3190}
3191
3192/* End of Generic Netlink interface definitions */
3193
1da177e4 3194
048cf48b 3195int __init ip_vs_control_init(void)
1da177e4
LT
3196{
3197 int ret;
3198 int idx;
3199
3200 EnterFunction(2);
3201
3202 ret = nf_register_sockopt(&ip_vs_sockopts);
3203 if (ret) {
3204 IP_VS_ERR("cannot register sockopt.\n");
3205 return ret;
3206 }
3207
9a812198
JV
3208 ret = ip_vs_genl_register();
3209 if (ret) {
3210 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3211 nf_unregister_sockopt(&ip_vs_sockopts);
3212 return ret;
3213 }
3214
457c4cbc
EB
3215 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3216 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3217
90754f8e 3218 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3219
3220 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3221 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3222 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3223 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3224 }
3225 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3226 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3227 }
3228
1da177e4
LT
3229 ip_vs_new_estimator(&ip_vs_stats);
3230
3231 /* Hook the defense timer */
3232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3233
3234 LeaveFunction(2);
3235 return 0;
3236}
3237
3238
3239void ip_vs_control_cleanup(void)
3240{
3241 EnterFunction(2);
3242 ip_vs_trash_cleanup();
3243 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3244 cancel_work_sync(&defense_work.work);
1da177e4
LT
3245 ip_vs_kill_estimator(&ip_vs_stats);
3246 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3247 proc_net_remove(&init_net, "ip_vs_stats");
3248 proc_net_remove(&init_net, "ip_vs");
9a812198 3249 ip_vs_genl_unregister();
1da177e4
LT
3250 nf_unregister_sockopt(&ip_vs_sockopts);
3251 LeaveFunction(2);
3252}