]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add genetlink interface implementation
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
014d730d 285ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
1da177e4
LT
286{
287 register unsigned porth = ntohs(port);
288
289 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290 & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 * Returns hash value of fwmark for virtual service lookup
295 */
296static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297{
298 return fwmark & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
305 */
306static int ip_vs_svc_hash(struct ip_vs_service *svc)
307{
308 unsigned hash;
309
310 if (svc->flags & IP_VS_SVC_F_HASHED) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
313 return 0;
314 }
315
316 if (svc->fwmark == 0) {
317 /*
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
319 */
320 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
321 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322 } else {
323 /*
324 * Hash it by fwmark in ip_vs_svc_fwm_table
325 */
326 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
327 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328 }
329
330 svc->flags |= IP_VS_SVC_F_HASHED;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc->refcnt);
333 return 1;
334}
335
336
337/*
338 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342{
343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
344 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345 "called from %p\n", __builtin_return_address(0));
346 return 0;
347 }
348
349 if (svc->fwmark == 0) {
350 /* Remove it from the ip_vs_svc_table table */
351 list_del(&svc->s_list);
352 } else {
353 /* Remove it from the ip_vs_svc_fwm_table table */
354 list_del(&svc->f_list);
355 }
356
357 svc->flags &= ~IP_VS_SVC_F_HASHED;
358 atomic_dec(&svc->refcnt);
359 return 1;
360}
361
362
363/*
364 * Get service by {proto,addr,port} in the service table.
365 */
366static __inline__ struct ip_vs_service *
014d730d 367__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
368{
369 unsigned hash;
370 struct ip_vs_service *svc;
371
372 /* Check for "full" addressed entries */
373 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
374
375 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
376 if ((svc->addr == vaddr)
377 && (svc->port == vport)
378 && (svc->protocol == protocol)) {
379 /* HIT */
380 atomic_inc(&svc->usecnt);
381 return svc;
382 }
383 }
384
385 return NULL;
386}
387
388
389/*
390 * Get service by {fwmark} in the service table.
391 */
392static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
393{
394 unsigned hash;
395 struct ip_vs_service *svc;
396
397 /* Check for fwmark addressed entries */
398 hash = ip_vs_svc_fwm_hashkey(fwmark);
399
400 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
401 if (svc->fwmark == fwmark) {
402 /* HIT */
403 atomic_inc(&svc->usecnt);
404 return svc;
405 }
406 }
407
408 return NULL;
409}
410
411struct ip_vs_service *
014d730d 412ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
413{
414 struct ip_vs_service *svc;
415
416 read_lock(&__ip_vs_svc_lock);
417
418 /*
419 * Check the table hashed by fwmark first
420 */
421 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
422 goto out;
423
424 /*
425 * Check the table hashed by <protocol,addr,port>
426 * for "full" addressed entries
427 */
428 svc = __ip_vs_service_get(protocol, vaddr, vport);
429
430 if (svc == NULL
431 && protocol == IPPROTO_TCP
432 && atomic_read(&ip_vs_ftpsvc_counter)
433 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434 /*
435 * Check if ftp service entry exists, the packet
436 * might belong to FTP data connections.
437 */
438 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
439 }
440
441 if (svc == NULL
442 && atomic_read(&ip_vs_nullsvc_counter)) {
443 /*
444 * Check if the catch-all port (port zero) exists
445 */
446 svc = __ip_vs_service_get(protocol, vaddr, 0);
447 }
448
449 out:
450 read_unlock(&__ip_vs_svc_lock);
451
4b5bdf5c 452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
453 fwmark, ip_vs_proto_name(protocol),
454 NIPQUAD(vaddr), ntohs(vport),
455 svc?"hit":"not hit");
456
457 return svc;
458}
459
460
461static inline void
462__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463{
464 atomic_inc(&svc->refcnt);
465 dest->svc = svc;
466}
467
468static inline void
469__ip_vs_unbind_svc(struct ip_vs_dest *dest)
470{
471 struct ip_vs_service *svc = dest->svc;
472
473 dest->svc = NULL;
474 if (atomic_dec_and_test(&svc->refcnt))
475 kfree(svc);
476}
477
478
479/*
480 * Returns hash value for real service
481 */
014d730d 482static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
483{
484 register unsigned porth = ntohs(port);
485
486 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
487 & IP_VS_RTAB_MASK;
488}
489
490/*
491 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492 * should be called with locked tables.
493 */
494static int ip_vs_rs_hash(struct ip_vs_dest *dest)
495{
496 unsigned hash;
497
498 if (!list_empty(&dest->d_list)) {
499 return 0;
500 }
501
502 /*
503 * Hash by proto,addr,port,
504 * which are the parameters of the real service.
505 */
506 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
507 list_add(&dest->d_list, &ip_vs_rtable[hash]);
508
509 return 1;
510}
511
512/*
513 * UNhashes ip_vs_dest from ip_vs_rtable.
514 * should be called with locked tables.
515 */
516static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
517{
518 /*
519 * Remove it from the ip_vs_rtable table.
520 */
521 if (!list_empty(&dest->d_list)) {
522 list_del(&dest->d_list);
523 INIT_LIST_HEAD(&dest->d_list);
524 }
525
526 return 1;
527}
528
529/*
530 * Lookup real service by <proto,addr,port> in the real service table.
531 */
532struct ip_vs_dest *
014d730d 533ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
534{
535 unsigned hash;
536 struct ip_vs_dest *dest;
537
538 /*
539 * Check for "full" addressed entries
540 * Return the first found entry
541 */
542 hash = ip_vs_rs_hashkey(daddr, dport);
543
544 read_lock(&__ip_vs_rs_lock);
545 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
546 if ((dest->addr == daddr)
547 && (dest->port == dport)
548 && ((dest->protocol == protocol) ||
549 dest->vfwmark)) {
550 /* HIT */
551 read_unlock(&__ip_vs_rs_lock);
552 return dest;
553 }
554 }
555 read_unlock(&__ip_vs_rs_lock);
556
557 return NULL;
558}
559
560/*
561 * Lookup destination by {addr,port} in the given service
562 */
563static struct ip_vs_dest *
014d730d 564ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
565{
566 struct ip_vs_dest *dest;
567
568 /*
569 * Find the destination for the given service
570 */
571 list_for_each_entry(dest, &svc->destinations, n_list) {
572 if ((dest->addr == daddr) && (dest->port == dport)) {
573 /* HIT */
574 return dest;
575 }
576 }
577
578 return NULL;
579}
580
1e356f9c
RB
581/*
582 * Find destination by {daddr,dport,vaddr,protocol}
583 * Cretaed to be used in ip_vs_process_message() in
584 * the backup synchronization daemon. It finds the
585 * destination to be bound to the received connection
586 * on the backup.
587 *
588 * ip_vs_lookup_real_service() looked promissing, but
589 * seems not working as expected.
590 */
591struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
592 __be32 vaddr, __be16 vport, __u16 protocol)
593{
594 struct ip_vs_dest *dest;
595 struct ip_vs_service *svc;
596
597 svc = ip_vs_service_get(0, protocol, vaddr, vport);
598 if (!svc)
599 return NULL;
600 dest = ip_vs_lookup_dest(svc, daddr, dport);
601 if (dest)
602 atomic_inc(&dest->refcnt);
603 ip_vs_service_put(svc);
604 return dest;
605}
1da177e4
LT
606
607/*
608 * Lookup dest by {svc,addr,port} in the destination trash.
609 * The destination trash is used to hold the destinations that are removed
610 * from the service table but are still referenced by some conn entries.
611 * The reason to add the destination trash is when the dest is temporary
612 * down (either by administrator or by monitor program), the dest can be
613 * picked back from the trash, the remaining connections to the dest can
614 * continue, and the counting information of the dest is also useful for
615 * scheduling.
616 */
617static struct ip_vs_dest *
014d730d 618ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
619{
620 struct ip_vs_dest *dest, *nxt;
621
622 /*
623 * Find the destination in trash
624 */
625 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
626 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 627 "dest->refcnt=%d\n",
1da177e4
LT
628 dest->vfwmark,
629 NIPQUAD(dest->addr), ntohs(dest->port),
630 atomic_read(&dest->refcnt));
631 if (dest->addr == daddr &&
632 dest->port == dport &&
633 dest->vfwmark == svc->fwmark &&
634 dest->protocol == svc->protocol &&
635 (svc->fwmark ||
636 (dest->vaddr == svc->addr &&
637 dest->vport == svc->port))) {
638 /* HIT */
639 return dest;
640 }
641
642 /*
643 * Try to purge the destination from trash if not referenced
644 */
645 if (atomic_read(&dest->refcnt) == 1) {
646 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
647 "from trash\n",
648 dest->vfwmark,
649 NIPQUAD(dest->addr), ntohs(dest->port));
650 list_del(&dest->n_list);
651 ip_vs_dst_reset(dest);
652 __ip_vs_unbind_svc(dest);
653 kfree(dest);
654 }
655 }
656
657 return NULL;
658}
659
660
661/*
662 * Clean up all the destinations in the trash
663 * Called by the ip_vs_control_cleanup()
664 *
665 * When the ip_vs_control_clearup is activated by ipvs module exit,
666 * the service tables must have been flushed and all the connections
667 * are expired, and the refcnt of each destination in the trash must
668 * be 1, so we simply release them here.
669 */
670static void ip_vs_trash_cleanup(void)
671{
672 struct ip_vs_dest *dest, *nxt;
673
674 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
675 list_del(&dest->n_list);
676 ip_vs_dst_reset(dest);
677 __ip_vs_unbind_svc(dest);
678 kfree(dest);
679 }
680}
681
682
683static void
684ip_vs_zero_stats(struct ip_vs_stats *stats)
685{
686 spin_lock_bh(&stats->lock);
e93615d0
SH
687
688 stats->conns = 0;
689 stats->inpkts = 0;
690 stats->outpkts = 0;
691 stats->inbytes = 0;
692 stats->outbytes = 0;
693
694 stats->cps = 0;
695 stats->inpps = 0;
696 stats->outpps = 0;
697 stats->inbps = 0;
698 stats->outbps = 0;
699
1da177e4 700 ip_vs_zero_estimator(stats);
e93615d0 701
3a14a313 702 spin_unlock_bh(&stats->lock);
1da177e4
LT
703}
704
705/*
706 * Update a destination in the given service
707 */
708static void
709__ip_vs_update_dest(struct ip_vs_service *svc,
710 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
711{
712 int conn_flags;
713
714 /* set the weight and the flags */
715 atomic_set(&dest->weight, udest->weight);
716 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
717
718 /* check if local node and update the flags */
6b175b26 719 if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
1da177e4
LT
720 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
721 | IP_VS_CONN_F_LOCALNODE;
722 }
723
724 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
725 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
726 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
727 } else {
728 /*
729 * Put the real service in ip_vs_rtable if not present.
730 * For now only for NAT!
731 */
732 write_lock_bh(&__ip_vs_rs_lock);
733 ip_vs_rs_hash(dest);
734 write_unlock_bh(&__ip_vs_rs_lock);
735 }
736 atomic_set(&dest->conn_flags, conn_flags);
737
738 /* bind the service */
739 if (!dest->svc) {
740 __ip_vs_bind_svc(dest, svc);
741 } else {
742 if (dest->svc != svc) {
743 __ip_vs_unbind_svc(dest);
744 ip_vs_zero_stats(&dest->stats);
745 __ip_vs_bind_svc(dest, svc);
746 }
747 }
748
749 /* set the dest status flags */
750 dest->flags |= IP_VS_DEST_F_AVAILABLE;
751
752 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
753 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
754 dest->u_threshold = udest->u_threshold;
755 dest->l_threshold = udest->l_threshold;
756}
757
758
759/*
760 * Create a destination for the given service
761 */
762static int
763ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
764 struct ip_vs_dest **dest_p)
765{
766 struct ip_vs_dest *dest;
767 unsigned atype;
768
769 EnterFunction(2);
770
6b175b26 771 atype = inet_addr_type(&init_net, udest->addr);
1da177e4
LT
772 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
773 return -EINVAL;
774
0da974f4 775 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
776 if (dest == NULL) {
777 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
778 return -ENOMEM;
779 }
1da177e4
LT
780
781 dest->protocol = svc->protocol;
782 dest->vaddr = svc->addr;
783 dest->vport = svc->port;
784 dest->vfwmark = svc->fwmark;
785 dest->addr = udest->addr;
786 dest->port = udest->port;
787
788 atomic_set(&dest->activeconns, 0);
789 atomic_set(&dest->inactconns, 0);
790 atomic_set(&dest->persistconns, 0);
791 atomic_set(&dest->refcnt, 0);
792
793 INIT_LIST_HEAD(&dest->d_list);
794 spin_lock_init(&dest->dst_lock);
795 spin_lock_init(&dest->stats.lock);
796 __ip_vs_update_dest(svc, dest, udest);
797 ip_vs_new_estimator(&dest->stats);
798
799 *dest_p = dest;
800
801 LeaveFunction(2);
802 return 0;
803}
804
805
806/*
807 * Add a destination into an existing service
808 */
809static int
810ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
811{
812 struct ip_vs_dest *dest;
014d730d
AV
813 __be32 daddr = udest->addr;
814 __be16 dport = udest->port;
1da177e4
LT
815 int ret;
816
817 EnterFunction(2);
818
819 if (udest->weight < 0) {
820 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
821 return -ERANGE;
822 }
823
824 if (udest->l_threshold > udest->u_threshold) {
825 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
826 "upper threshold\n");
827 return -ERANGE;
828 }
829
830 /*
831 * Check if the dest already exists in the list
832 */
833 dest = ip_vs_lookup_dest(svc, daddr, dport);
834 if (dest != NULL) {
835 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
836 return -EEXIST;
837 }
838
839 /*
840 * Check if the dest already exists in the trash and
841 * is from the same service
842 */
843 dest = ip_vs_trash_get_dest(svc, daddr, dport);
844 if (dest != NULL) {
845 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 846 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
847 NIPQUAD(daddr), ntohs(dport),
848 atomic_read(&dest->refcnt),
849 dest->vfwmark,
850 NIPQUAD(dest->vaddr),
851 ntohs(dest->vport));
852 __ip_vs_update_dest(svc, dest, udest);
853
854 /*
855 * Get the destination from the trash
856 */
857 list_del(&dest->n_list);
858
859 ip_vs_new_estimator(&dest->stats);
860
861 write_lock_bh(&__ip_vs_svc_lock);
862
863 /*
864 * Wait until all other svc users go away.
865 */
866 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
867
868 list_add(&dest->n_list, &svc->destinations);
869 svc->num_dests++;
870
871 /* call the update_service function of its scheduler */
872 svc->scheduler->update_service(svc);
873
874 write_unlock_bh(&__ip_vs_svc_lock);
875 return 0;
876 }
877
878 /*
879 * Allocate and initialize the dest structure
880 */
881 ret = ip_vs_new_dest(svc, udest, &dest);
882 if (ret) {
883 return ret;
884 }
885
886 /*
887 * Add the dest entry into the list
888 */
889 atomic_inc(&dest->refcnt);
890
891 write_lock_bh(&__ip_vs_svc_lock);
892
893 /*
894 * Wait until all other svc users go away.
895 */
896 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
897
898 list_add(&dest->n_list, &svc->destinations);
899 svc->num_dests++;
900
901 /* call the update_service function of its scheduler */
902 svc->scheduler->update_service(svc);
903
904 write_unlock_bh(&__ip_vs_svc_lock);
905
906 LeaveFunction(2);
907
908 return 0;
909}
910
911
912/*
913 * Edit a destination in the given service
914 */
915static int
916ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
917{
918 struct ip_vs_dest *dest;
014d730d
AV
919 __be32 daddr = udest->addr;
920 __be16 dport = udest->port;
1da177e4
LT
921
922 EnterFunction(2);
923
924 if (udest->weight < 0) {
925 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
926 return -ERANGE;
927 }
928
929 if (udest->l_threshold > udest->u_threshold) {
930 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
931 "upper threshold\n");
932 return -ERANGE;
933 }
934
935 /*
936 * Lookup the destination list
937 */
938 dest = ip_vs_lookup_dest(svc, daddr, dport);
939 if (dest == NULL) {
940 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
941 return -ENOENT;
942 }
943
944 __ip_vs_update_dest(svc, dest, udest);
945
946 write_lock_bh(&__ip_vs_svc_lock);
947
948 /* Wait until all other svc users go away */
cae7ca3d 949 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
950
951 /* call the update_service, because server weight may be changed */
952 svc->scheduler->update_service(svc);
953
954 write_unlock_bh(&__ip_vs_svc_lock);
955
956 LeaveFunction(2);
957
958 return 0;
959}
960
961
962/*
963 * Delete a destination (must be already unlinked from the service)
964 */
965static void __ip_vs_del_dest(struct ip_vs_dest *dest)
966{
967 ip_vs_kill_estimator(&dest->stats);
968
969 /*
970 * Remove it from the d-linked list with the real services.
971 */
972 write_lock_bh(&__ip_vs_rs_lock);
973 ip_vs_rs_unhash(dest);
974 write_unlock_bh(&__ip_vs_rs_lock);
975
976 /*
977 * Decrease the refcnt of the dest, and free the dest
978 * if nobody refers to it (refcnt=0). Otherwise, throw
979 * the destination into the trash.
980 */
981 if (atomic_dec_and_test(&dest->refcnt)) {
982 ip_vs_dst_reset(dest);
983 /* simply decrease svc->refcnt here, let the caller check
984 and release the service if nobody refers to it.
985 Only user context can release destination and service,
986 and only one user context can update virtual service at a
987 time, so the operation here is OK */
988 atomic_dec(&dest->svc->refcnt);
989 kfree(dest);
990 } else {
4b5bdf5c
RN
991 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
992 "dest->refcnt=%d\n",
1da177e4
LT
993 NIPQUAD(dest->addr), ntohs(dest->port),
994 atomic_read(&dest->refcnt));
995 list_add(&dest->n_list, &ip_vs_dest_trash);
996 atomic_inc(&dest->refcnt);
997 }
998}
999
1000
1001/*
1002 * Unlink a destination from the given service
1003 */
1004static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1005 struct ip_vs_dest *dest,
1006 int svcupd)
1007{
1008 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1009
1010 /*
1011 * Remove it from the d-linked destination list.
1012 */
1013 list_del(&dest->n_list);
1014 svc->num_dests--;
1015 if (svcupd) {
1016 /*
1017 * Call the update_service function of its scheduler
1018 */
1019 svc->scheduler->update_service(svc);
1020 }
1021}
1022
1023
1024/*
1025 * Delete a destination server in the given service
1026 */
1027static int
1028ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1029{
1030 struct ip_vs_dest *dest;
014d730d
AV
1031 __be32 daddr = udest->addr;
1032 __be16 dport = udest->port;
1da177e4
LT
1033
1034 EnterFunction(2);
1035
1036 dest = ip_vs_lookup_dest(svc, daddr, dport);
1037 if (dest == NULL) {
1038 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1039 return -ENOENT;
1040 }
1041
1042 write_lock_bh(&__ip_vs_svc_lock);
1043
1044 /*
1045 * Wait until all other svc users go away.
1046 */
1047 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1048
1049 /*
1050 * Unlink dest from the service
1051 */
1052 __ip_vs_unlink_dest(svc, dest, 1);
1053
1054 write_unlock_bh(&__ip_vs_svc_lock);
1055
1056 /*
1057 * Delete the destination
1058 */
1059 __ip_vs_del_dest(dest);
1060
1061 LeaveFunction(2);
1062
1063 return 0;
1064}
1065
1066
1067/*
1068 * Add a service into the service hash table
1069 */
1070static int
1071ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1072{
1073 int ret = 0;
1074 struct ip_vs_scheduler *sched = NULL;
1075 struct ip_vs_service *svc = NULL;
1076
1077 /* increase the module use count */
1078 ip_vs_use_count_inc();
1079
1080 /* Lookup the scheduler by 'u->sched_name' */
1081 sched = ip_vs_scheduler_get(u->sched_name);
1082 if (sched == NULL) {
1083 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1084 u->sched_name);
1085 ret = -ENOENT;
1086 goto out_mod_dec;
1087 }
1088
0da974f4 1089 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1090 if (svc == NULL) {
1091 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1092 ret = -ENOMEM;
1093 goto out_err;
1094 }
1da177e4
LT
1095
1096 /* I'm the first user of the service */
1097 atomic_set(&svc->usecnt, 1);
1098 atomic_set(&svc->refcnt, 0);
1099
1100 svc->protocol = u->protocol;
1101 svc->addr = u->addr;
1102 svc->port = u->port;
1103 svc->fwmark = u->fwmark;
1104 svc->flags = u->flags;
1105 svc->timeout = u->timeout * HZ;
1106 svc->netmask = u->netmask;
1107
1108 INIT_LIST_HEAD(&svc->destinations);
1109 rwlock_init(&svc->sched_lock);
1110 spin_lock_init(&svc->stats.lock);
1111
1112 /* Bind the scheduler */
1113 ret = ip_vs_bind_scheduler(svc, sched);
1114 if (ret)
1115 goto out_err;
1116 sched = NULL;
1117
1118 /* Update the virtual service counters */
1119 if (svc->port == FTPPORT)
1120 atomic_inc(&ip_vs_ftpsvc_counter);
1121 else if (svc->port == 0)
1122 atomic_inc(&ip_vs_nullsvc_counter);
1123
1124 ip_vs_new_estimator(&svc->stats);
1125 ip_vs_num_services++;
1126
1127 /* Hash the service into the service table */
1128 write_lock_bh(&__ip_vs_svc_lock);
1129 ip_vs_svc_hash(svc);
1130 write_unlock_bh(&__ip_vs_svc_lock);
1131
1132 *svc_p = svc;
1133 return 0;
1134
1135 out_err:
1136 if (svc != NULL) {
1137 if (svc->scheduler)
1138 ip_vs_unbind_scheduler(svc);
1139 if (svc->inc) {
1140 local_bh_disable();
1141 ip_vs_app_inc_put(svc->inc);
1142 local_bh_enable();
1143 }
1144 kfree(svc);
1145 }
1146 ip_vs_scheduler_put(sched);
1147
1148 out_mod_dec:
1149 /* decrease the module use count */
1150 ip_vs_use_count_dec();
1151
1152 return ret;
1153}
1154
1155
1156/*
1157 * Edit a service and bind it with a new scheduler
1158 */
1159static int
1160ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1161{
1162 struct ip_vs_scheduler *sched, *old_sched;
1163 int ret = 0;
1164
1165 /*
1166 * Lookup the scheduler, by 'u->sched_name'
1167 */
1168 sched = ip_vs_scheduler_get(u->sched_name);
1169 if (sched == NULL) {
1170 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1171 u->sched_name);
1172 return -ENOENT;
1173 }
1174 old_sched = sched;
1175
1176 write_lock_bh(&__ip_vs_svc_lock);
1177
1178 /*
1179 * Wait until all other svc users go away.
1180 */
1181 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1182
1183 /*
1184 * Set the flags and timeout value
1185 */
1186 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1187 svc->timeout = u->timeout * HZ;
1188 svc->netmask = u->netmask;
1189
1190 old_sched = svc->scheduler;
1191 if (sched != old_sched) {
1192 /*
1193 * Unbind the old scheduler
1194 */
1195 if ((ret = ip_vs_unbind_scheduler(svc))) {
1196 old_sched = sched;
1197 goto out;
1198 }
1199
1200 /*
1201 * Bind the new scheduler
1202 */
1203 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1204 /*
1205 * If ip_vs_bind_scheduler fails, restore the old
1206 * scheduler.
1207 * The main reason of failure is out of memory.
1208 *
1209 * The question is if the old scheduler can be
1210 * restored all the time. TODO: if it cannot be
1211 * restored some time, we must delete the service,
1212 * otherwise the system may crash.
1213 */
1214 ip_vs_bind_scheduler(svc, old_sched);
1215 old_sched = sched;
1216 goto out;
1217 }
1218 }
1219
1220 out:
1221 write_unlock_bh(&__ip_vs_svc_lock);
1222
1223 if (old_sched)
1224 ip_vs_scheduler_put(old_sched);
1225
1226 return ret;
1227}
1228
1229
1230/*
1231 * Delete a service from the service list
1232 * - The service must be unlinked, unlocked and not referenced!
1233 * - We are called under _bh lock
1234 */
1235static void __ip_vs_del_service(struct ip_vs_service *svc)
1236{
1237 struct ip_vs_dest *dest, *nxt;
1238 struct ip_vs_scheduler *old_sched;
1239
1240 ip_vs_num_services--;
1241 ip_vs_kill_estimator(&svc->stats);
1242
1243 /* Unbind scheduler */
1244 old_sched = svc->scheduler;
1245 ip_vs_unbind_scheduler(svc);
1246 if (old_sched)
1247 ip_vs_scheduler_put(old_sched);
1248
1249 /* Unbind app inc */
1250 if (svc->inc) {
1251 ip_vs_app_inc_put(svc->inc);
1252 svc->inc = NULL;
1253 }
1254
1255 /*
1256 * Unlink the whole destination list
1257 */
1258 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1259 __ip_vs_unlink_dest(svc, dest, 0);
1260 __ip_vs_del_dest(dest);
1261 }
1262
1263 /*
1264 * Update the virtual service counters
1265 */
1266 if (svc->port == FTPPORT)
1267 atomic_dec(&ip_vs_ftpsvc_counter);
1268 else if (svc->port == 0)
1269 atomic_dec(&ip_vs_nullsvc_counter);
1270
1271 /*
1272 * Free the service if nobody refers to it
1273 */
1274 if (atomic_read(&svc->refcnt) == 0)
1275 kfree(svc);
1276
1277 /* decrease the module use count */
1278 ip_vs_use_count_dec();
1279}
1280
1281/*
1282 * Delete a service from the service list
1283 */
1284static int ip_vs_del_service(struct ip_vs_service *svc)
1285{
1286 if (svc == NULL)
1287 return -EEXIST;
1288
1289 /*
1290 * Unhash it from the service table
1291 */
1292 write_lock_bh(&__ip_vs_svc_lock);
1293
1294 ip_vs_svc_unhash(svc);
1295
1296 /*
1297 * Wait until all the svc users go away.
1298 */
1299 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1300
1301 __ip_vs_del_service(svc);
1302
1303 write_unlock_bh(&__ip_vs_svc_lock);
1304
1305 return 0;
1306}
1307
1308
1309/*
1310 * Flush all the virtual services
1311 */
1312static int ip_vs_flush(void)
1313{
1314 int idx;
1315 struct ip_vs_service *svc, *nxt;
1316
1317 /*
1318 * Flush the service table hashed by <protocol,addr,port>
1319 */
1320 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1321 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1322 write_lock_bh(&__ip_vs_svc_lock);
1323 ip_vs_svc_unhash(svc);
1324 /*
1325 * Wait until all the svc users go away.
1326 */
1327 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1328 __ip_vs_del_service(svc);
1329 write_unlock_bh(&__ip_vs_svc_lock);
1330 }
1331 }
1332
1333 /*
1334 * Flush the service table hashed by fwmark
1335 */
1336 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1337 list_for_each_entry_safe(svc, nxt,
1338 &ip_vs_svc_fwm_table[idx], f_list) {
1339 write_lock_bh(&__ip_vs_svc_lock);
1340 ip_vs_svc_unhash(svc);
1341 /*
1342 * Wait until all the svc users go away.
1343 */
1344 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1345 __ip_vs_del_service(svc);
1346 write_unlock_bh(&__ip_vs_svc_lock);
1347 }
1348 }
1349
1350 return 0;
1351}
1352
1353
1354/*
1355 * Zero counters in a service or all services
1356 */
1357static int ip_vs_zero_service(struct ip_vs_service *svc)
1358{
1359 struct ip_vs_dest *dest;
1360
1361 write_lock_bh(&__ip_vs_svc_lock);
1362 list_for_each_entry(dest, &svc->destinations, n_list) {
1363 ip_vs_zero_stats(&dest->stats);
1364 }
1365 ip_vs_zero_stats(&svc->stats);
1366 write_unlock_bh(&__ip_vs_svc_lock);
1367 return 0;
1368}
1369
1370static int ip_vs_zero_all(void)
1371{
1372 int idx;
1373 struct ip_vs_service *svc;
1374
1375 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1376 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1377 ip_vs_zero_service(svc);
1378 }
1379 }
1380
1381 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1382 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1383 ip_vs_zero_service(svc);
1384 }
1385 }
1386
1387 ip_vs_zero_stats(&ip_vs_stats);
1388 return 0;
1389}
1390
1391
1392static int
1393proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1394 void __user *buffer, size_t *lenp, loff_t *ppos)
1395{
1396 int *valp = table->data;
1397 int val = *valp;
1398 int rc;
1399
1400 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1401 if (write && (*valp != val)) {
1402 if ((*valp < 0) || (*valp > 3)) {
1403 /* Restore the correct value */
1404 *valp = val;
1405 } else {
1da177e4 1406 update_defense_level();
1da177e4
LT
1407 }
1408 }
1409 return rc;
1410}
1411
1412
1413static int
1414proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1415 void __user *buffer, size_t *lenp, loff_t *ppos)
1416{
1417 int *valp = table->data;
1418 int val[2];
1419 int rc;
1420
1421 /* backup the value first */
1422 memcpy(val, valp, sizeof(val));
1423
1424 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1425 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1426 /* Restore the correct value */
1427 memcpy(valp, val, sizeof(val));
1428 }
1429 return rc;
1430}
1431
1432
1433/*
1434 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1435 */
1436
1437static struct ctl_table vs_vars[] = {
1438 {
1da177e4
LT
1439 .procname = "amemthresh",
1440 .data = &sysctl_ip_vs_amemthresh,
1441 .maxlen = sizeof(int),
1442 .mode = 0644,
1443 .proc_handler = &proc_dointvec,
1444 },
1445#ifdef CONFIG_IP_VS_DEBUG
1446 {
1da177e4
LT
1447 .procname = "debug_level",
1448 .data = &sysctl_ip_vs_debug_level,
1449 .maxlen = sizeof(int),
1450 .mode = 0644,
1451 .proc_handler = &proc_dointvec,
1452 },
1453#endif
1454 {
1da177e4
LT
1455 .procname = "am_droprate",
1456 .data = &sysctl_ip_vs_am_droprate,
1457 .maxlen = sizeof(int),
1458 .mode = 0644,
1459 .proc_handler = &proc_dointvec,
1460 },
1461 {
1da177e4
LT
1462 .procname = "drop_entry",
1463 .data = &sysctl_ip_vs_drop_entry,
1464 .maxlen = sizeof(int),
1465 .mode = 0644,
1466 .proc_handler = &proc_do_defense_mode,
1467 },
1468 {
1da177e4
LT
1469 .procname = "drop_packet",
1470 .data = &sysctl_ip_vs_drop_packet,
1471 .maxlen = sizeof(int),
1472 .mode = 0644,
1473 .proc_handler = &proc_do_defense_mode,
1474 },
1475 {
1da177e4
LT
1476 .procname = "secure_tcp",
1477 .data = &sysctl_ip_vs_secure_tcp,
1478 .maxlen = sizeof(int),
1479 .mode = 0644,
1480 .proc_handler = &proc_do_defense_mode,
1481 },
1482#if 0
1483 {
1da177e4
LT
1484 .procname = "timeout_established",
1485 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1486 .maxlen = sizeof(int),
1487 .mode = 0644,
1488 .proc_handler = &proc_dointvec_jiffies,
1489 },
1490 {
1da177e4
LT
1491 .procname = "timeout_synsent",
1492 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1493 .maxlen = sizeof(int),
1494 .mode = 0644,
1495 .proc_handler = &proc_dointvec_jiffies,
1496 },
1497 {
1da177e4
LT
1498 .procname = "timeout_synrecv",
1499 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1500 .maxlen = sizeof(int),
1501 .mode = 0644,
1502 .proc_handler = &proc_dointvec_jiffies,
1503 },
1504 {
1da177e4
LT
1505 .procname = "timeout_finwait",
1506 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1507 .maxlen = sizeof(int),
1508 .mode = 0644,
1509 .proc_handler = &proc_dointvec_jiffies,
1510 },
1511 {
1da177e4
LT
1512 .procname = "timeout_timewait",
1513 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1514 .maxlen = sizeof(int),
1515 .mode = 0644,
1516 .proc_handler = &proc_dointvec_jiffies,
1517 },
1518 {
1da177e4
LT
1519 .procname = "timeout_close",
1520 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1521 .maxlen = sizeof(int),
1522 .mode = 0644,
1523 .proc_handler = &proc_dointvec_jiffies,
1524 },
1525 {
1da177e4
LT
1526 .procname = "timeout_closewait",
1527 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1528 .maxlen = sizeof(int),
1529 .mode = 0644,
1530 .proc_handler = &proc_dointvec_jiffies,
1531 },
1532 {
1da177e4
LT
1533 .procname = "timeout_lastack",
1534 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1535 .maxlen = sizeof(int),
1536 .mode = 0644,
1537 .proc_handler = &proc_dointvec_jiffies,
1538 },
1539 {
1da177e4
LT
1540 .procname = "timeout_listen",
1541 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1542 .maxlen = sizeof(int),
1543 .mode = 0644,
1544 .proc_handler = &proc_dointvec_jiffies,
1545 },
1546 {
1da177e4
LT
1547 .procname = "timeout_synack",
1548 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1549 .maxlen = sizeof(int),
1550 .mode = 0644,
1551 .proc_handler = &proc_dointvec_jiffies,
1552 },
1553 {
1da177e4
LT
1554 .procname = "timeout_udp",
1555 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = &proc_dointvec_jiffies,
1559 },
1560 {
1da177e4
LT
1561 .procname = "timeout_icmp",
1562 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1563 .maxlen = sizeof(int),
1564 .mode = 0644,
1565 .proc_handler = &proc_dointvec_jiffies,
1566 },
1567#endif
1568 {
1da177e4
LT
1569 .procname = "cache_bypass",
1570 .data = &sysctl_ip_vs_cache_bypass,
1571 .maxlen = sizeof(int),
1572 .mode = 0644,
1573 .proc_handler = &proc_dointvec,
1574 },
1575 {
1da177e4
LT
1576 .procname = "expire_nodest_conn",
1577 .data = &sysctl_ip_vs_expire_nodest_conn,
1578 .maxlen = sizeof(int),
1579 .mode = 0644,
1580 .proc_handler = &proc_dointvec,
1581 },
1582 {
1da177e4
LT
1583 .procname = "expire_quiescent_template",
1584 .data = &sysctl_ip_vs_expire_quiescent_template,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec,
1588 },
1589 {
1da177e4
LT
1590 .procname = "sync_threshold",
1591 .data = &sysctl_ip_vs_sync_threshold,
1592 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1593 .mode = 0644,
1594 .proc_handler = &proc_do_sync_threshold,
1595 },
1596 {
1da177e4
LT
1597 .procname = "nat_icmp_send",
1598 .data = &sysctl_ip_vs_nat_icmp_send,
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
1601 .proc_handler = &proc_dointvec,
1602 },
1603 { .ctl_name = 0 }
1604};
1605
5587da55 1606const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1607 { .procname = "net", .ctl_name = CTL_NET, },
1608 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1609 { .procname = "vs", },
1610 { }
1da177e4 1611};
90754f8e 1612EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1613
1614static struct ctl_table_header * sysctl_header;
1615
1616#ifdef CONFIG_PROC_FS
1617
1618struct ip_vs_iter {
1619 struct list_head *table;
1620 int bucket;
1621};
1622
1623/*
1624 * Write the contents of the VS rule table to a PROCfs file.
1625 * (It is kept just for backward compatibility)
1626 */
1627static inline const char *ip_vs_fwd_name(unsigned flags)
1628{
1629 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1630 case IP_VS_CONN_F_LOCALNODE:
1631 return "Local";
1632 case IP_VS_CONN_F_TUNNEL:
1633 return "Tunnel";
1634 case IP_VS_CONN_F_DROUTE:
1635 return "Route";
1636 default:
1637 return "Masq";
1638 }
1639}
1640
1641
1642/* Get the Nth entry in the two lists */
1643static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1644{
1645 struct ip_vs_iter *iter = seq->private;
1646 int idx;
1647 struct ip_vs_service *svc;
1648
1649 /* look in hash by protocol */
1650 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1651 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1652 if (pos-- == 0){
1653 iter->table = ip_vs_svc_table;
1654 iter->bucket = idx;
1655 return svc;
1656 }
1657 }
1658 }
1659
1660 /* keep looking in fwmark */
1661 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1662 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1663 if (pos-- == 0) {
1664 iter->table = ip_vs_svc_fwm_table;
1665 iter->bucket = idx;
1666 return svc;
1667 }
1668 }
1669 }
1670
1671 return NULL;
1672}
1673
1674static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1675{
1676
1677 read_lock_bh(&__ip_vs_svc_lock);
1678 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1679}
1680
1681
1682static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1683{
1684 struct list_head *e;
1685 struct ip_vs_iter *iter;
1686 struct ip_vs_service *svc;
1687
1688 ++*pos;
1689 if (v == SEQ_START_TOKEN)
1690 return ip_vs_info_array(seq,0);
1691
1692 svc = v;
1693 iter = seq->private;
1694
1695 if (iter->table == ip_vs_svc_table) {
1696 /* next service in table hashed by protocol */
1697 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1698 return list_entry(e, struct ip_vs_service, s_list);
1699
1700
1701 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1702 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1703 s_list) {
1704 return svc;
1705 }
1706 }
1707
1708 iter->table = ip_vs_svc_fwm_table;
1709 iter->bucket = -1;
1710 goto scan_fwmark;
1711 }
1712
1713 /* next service in hashed by fwmark */
1714 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1715 return list_entry(e, struct ip_vs_service, f_list);
1716
1717 scan_fwmark:
1718 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1719 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1720 f_list)
1721 return svc;
1722 }
1723
1724 return NULL;
1725}
1726
1727static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1728{
1729 read_unlock_bh(&__ip_vs_svc_lock);
1730}
1731
1732
1733static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1734{
1735 if (v == SEQ_START_TOKEN) {
1736 seq_printf(seq,
1737 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1738 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1739 seq_puts(seq,
1740 "Prot LocalAddress:Port Scheduler Flags\n");
1741 seq_puts(seq,
1742 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1743 } else {
1744 const struct ip_vs_service *svc = v;
1745 const struct ip_vs_iter *iter = seq->private;
1746 const struct ip_vs_dest *dest;
1747
1748 if (iter->table == ip_vs_svc_table)
1749 seq_printf(seq, "%s %08X:%04X %s ",
1750 ip_vs_proto_name(svc->protocol),
1751 ntohl(svc->addr),
1752 ntohs(svc->port),
1753 svc->scheduler->name);
1754 else
1755 seq_printf(seq, "FWM %08X %s ",
1756 svc->fwmark, svc->scheduler->name);
1757
1758 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1759 seq_printf(seq, "persistent %d %08X\n",
1760 svc->timeout,
1761 ntohl(svc->netmask));
1762 else
1763 seq_putc(seq, '\n');
1764
1765 list_for_each_entry(dest, &svc->destinations, n_list) {
1766 seq_printf(seq,
1767 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1768 ntohl(dest->addr), ntohs(dest->port),
1769 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1770 atomic_read(&dest->weight),
1771 atomic_read(&dest->activeconns),
1772 atomic_read(&dest->inactconns));
1773 }
1774 }
1775 return 0;
1776}
1777
56b3d975 1778static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1779 .start = ip_vs_info_seq_start,
1780 .next = ip_vs_info_seq_next,
1781 .stop = ip_vs_info_seq_stop,
1782 .show = ip_vs_info_seq_show,
1783};
1784
1785static int ip_vs_info_open(struct inode *inode, struct file *file)
1786{
cf7732e4
PE
1787 return seq_open_private(file, &ip_vs_info_seq_ops,
1788 sizeof(struct ip_vs_iter));
1da177e4
LT
1789}
1790
9a32144e 1791static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1792 .owner = THIS_MODULE,
1793 .open = ip_vs_info_open,
1794 .read = seq_read,
1795 .llseek = seq_lseek,
1796 .release = seq_release_private,
1797};
1798
1799#endif
1800
519e49e8
SW
1801struct ip_vs_stats ip_vs_stats = {
1802 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1803};
1da177e4
LT
1804
1805#ifdef CONFIG_PROC_FS
1806static int ip_vs_stats_show(struct seq_file *seq, void *v)
1807{
1808
1809/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1810 seq_puts(seq,
1811 " Total Incoming Outgoing Incoming Outgoing\n");
1812 seq_printf(seq,
1813 " Conns Packets Packets Bytes Bytes\n");
1814
1815 spin_lock_bh(&ip_vs_stats.lock);
1816 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1817 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1818 (unsigned long long) ip_vs_stats.inbytes,
1819 (unsigned long long) ip_vs_stats.outbytes);
1820
1821/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1822 seq_puts(seq,
1823 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1824 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1825 ip_vs_stats.cps,
1826 ip_vs_stats.inpps,
1827 ip_vs_stats.outpps,
1828 ip_vs_stats.inbps,
1829 ip_vs_stats.outbps);
1830 spin_unlock_bh(&ip_vs_stats.lock);
1831
1832 return 0;
1833}
1834
1835static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1836{
1837 return single_open(file, ip_vs_stats_show, NULL);
1838}
1839
9a32144e 1840static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1841 .owner = THIS_MODULE,
1842 .open = ip_vs_stats_seq_open,
1843 .read = seq_read,
1844 .llseek = seq_lseek,
1845 .release = single_release,
1846};
1847
1848#endif
1849
1850/*
1851 * Set timeout values for tcp tcpfin udp in the timeout_table.
1852 */
1853static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1854{
1855 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1856 u->tcp_timeout,
1857 u->tcp_fin_timeout,
1858 u->udp_timeout);
1859
1860#ifdef CONFIG_IP_VS_PROTO_TCP
1861 if (u->tcp_timeout) {
1862 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1863 = u->tcp_timeout * HZ;
1864 }
1865
1866 if (u->tcp_fin_timeout) {
1867 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1868 = u->tcp_fin_timeout * HZ;
1869 }
1870#endif
1871
1872#ifdef CONFIG_IP_VS_PROTO_UDP
1873 if (u->udp_timeout) {
1874 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1875 = u->udp_timeout * HZ;
1876 }
1877#endif
1878 return 0;
1879}
1880
1881
1882#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1883#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1884#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1885 sizeof(struct ip_vs_dest_user))
1886#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1887#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1888#define MAX_ARG_LEN SVCDEST_ARG_LEN
1889
9b5b5cff 1890static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1891 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1892 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1893 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1894 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1895 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1896 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1897 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1898 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1899 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1900 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1901 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1902};
1903
1904static int
1905do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1906{
1907 int ret;
1908 unsigned char arg[MAX_ARG_LEN];
1909 struct ip_vs_service_user *usvc;
1910 struct ip_vs_service *svc;
1911 struct ip_vs_dest_user *udest;
1912
1913 if (!capable(CAP_NET_ADMIN))
1914 return -EPERM;
1915
1916 if (len != set_arglen[SET_CMDID(cmd)]) {
1917 IP_VS_ERR("set_ctl: len %u != %u\n",
1918 len, set_arglen[SET_CMDID(cmd)]);
1919 return -EINVAL;
1920 }
1921
1922 if (copy_from_user(arg, user, len) != 0)
1923 return -EFAULT;
1924
1925 /* increase the module use count */
1926 ip_vs_use_count_inc();
1927
14cc3e2b 1928 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1929 ret = -ERESTARTSYS;
1930 goto out_dec;
1931 }
1932
1933 if (cmd == IP_VS_SO_SET_FLUSH) {
1934 /* Flush the virtual service */
1935 ret = ip_vs_flush();
1936 goto out_unlock;
1937 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1938 /* Set timeout values for (tcp tcpfin udp) */
1939 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1940 goto out_unlock;
1941 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1942 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1943 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1944 goto out_unlock;
1945 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1946 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1947 ret = stop_sync_thread(dm->state);
1948 goto out_unlock;
1949 }
1950
1951 usvc = (struct ip_vs_service_user *)arg;
1952 udest = (struct ip_vs_dest_user *)(usvc + 1);
1953
1954 if (cmd == IP_VS_SO_SET_ZERO) {
1955 /* if no service address is set, zero counters in all */
1956 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1957 ret = ip_vs_zero_all();
1958 goto out_unlock;
1959 }
1960 }
1961
1962 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1963 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1964 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1965 usvc->protocol, NIPQUAD(usvc->addr),
1966 ntohs(usvc->port), usvc->sched_name);
1967 ret = -EFAULT;
1968 goto out_unlock;
1969 }
1970
1971 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1972 if (usvc->fwmark == 0)
1973 svc = __ip_vs_service_get(usvc->protocol,
1974 usvc->addr, usvc->port);
1975 else
1976 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1977
1978 if (cmd != IP_VS_SO_SET_ADD
1979 && (svc == NULL || svc->protocol != usvc->protocol)) {
1980 ret = -ESRCH;
1981 goto out_unlock;
1982 }
1983
1984 switch (cmd) {
1985 case IP_VS_SO_SET_ADD:
1986 if (svc != NULL)
1987 ret = -EEXIST;
1988 else
1989 ret = ip_vs_add_service(usvc, &svc);
1990 break;
1991 case IP_VS_SO_SET_EDIT:
1992 ret = ip_vs_edit_service(svc, usvc);
1993 break;
1994 case IP_VS_SO_SET_DEL:
1995 ret = ip_vs_del_service(svc);
1996 if (!ret)
1997 goto out_unlock;
1998 break;
1999 case IP_VS_SO_SET_ZERO:
2000 ret = ip_vs_zero_service(svc);
2001 break;
2002 case IP_VS_SO_SET_ADDDEST:
2003 ret = ip_vs_add_dest(svc, udest);
2004 break;
2005 case IP_VS_SO_SET_EDITDEST:
2006 ret = ip_vs_edit_dest(svc, udest);
2007 break;
2008 case IP_VS_SO_SET_DELDEST:
2009 ret = ip_vs_del_dest(svc, udest);
2010 break;
2011 default:
2012 ret = -EINVAL;
2013 }
2014
2015 if (svc)
2016 ip_vs_service_put(svc);
2017
2018 out_unlock:
14cc3e2b 2019 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2020 out_dec:
2021 /* decrease the module use count */
2022 ip_vs_use_count_dec();
2023
2024 return ret;
2025}
2026
2027
2028static void
2029ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2030{
2031 spin_lock_bh(&src->lock);
2032 memcpy(dst, src, (char*)&src->lock - (char*)src);
2033 spin_unlock_bh(&src->lock);
2034}
2035
2036static void
2037ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2038{
2039 dst->protocol = src->protocol;
2040 dst->addr = src->addr;
2041 dst->port = src->port;
2042 dst->fwmark = src->fwmark;
4da62fc7 2043 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2044 dst->flags = src->flags;
2045 dst->timeout = src->timeout / HZ;
2046 dst->netmask = src->netmask;
2047 dst->num_dests = src->num_dests;
2048 ip_vs_copy_stats(&dst->stats, &src->stats);
2049}
2050
2051static inline int
2052__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2053 struct ip_vs_get_services __user *uptr)
2054{
2055 int idx, count=0;
2056 struct ip_vs_service *svc;
2057 struct ip_vs_service_entry entry;
2058 int ret = 0;
2059
2060 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2061 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2062 if (count >= get->num_services)
2063 goto out;
4da62fc7 2064 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2065 ip_vs_copy_service(&entry, svc);
2066 if (copy_to_user(&uptr->entrytable[count],
2067 &entry, sizeof(entry))) {
2068 ret = -EFAULT;
2069 goto out;
2070 }
2071 count++;
2072 }
2073 }
2074
2075 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2076 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2077 if (count >= get->num_services)
2078 goto out;
4da62fc7 2079 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2080 ip_vs_copy_service(&entry, svc);
2081 if (copy_to_user(&uptr->entrytable[count],
2082 &entry, sizeof(entry))) {
2083 ret = -EFAULT;
2084 goto out;
2085 }
2086 count++;
2087 }
2088 }
2089 out:
2090 return ret;
2091}
2092
2093static inline int
2094__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2095 struct ip_vs_get_dests __user *uptr)
2096{
2097 struct ip_vs_service *svc;
2098 int ret = 0;
2099
2100 if (get->fwmark)
2101 svc = __ip_vs_svc_fwm_get(get->fwmark);
2102 else
2103 svc = __ip_vs_service_get(get->protocol,
2104 get->addr, get->port);
2105 if (svc) {
2106 int count = 0;
2107 struct ip_vs_dest *dest;
2108 struct ip_vs_dest_entry entry;
2109
2110 list_for_each_entry(dest, &svc->destinations, n_list) {
2111 if (count >= get->num_dests)
2112 break;
2113
2114 entry.addr = dest->addr;
2115 entry.port = dest->port;
2116 entry.conn_flags = atomic_read(&dest->conn_flags);
2117 entry.weight = atomic_read(&dest->weight);
2118 entry.u_threshold = dest->u_threshold;
2119 entry.l_threshold = dest->l_threshold;
2120 entry.activeconns = atomic_read(&dest->activeconns);
2121 entry.inactconns = atomic_read(&dest->inactconns);
2122 entry.persistconns = atomic_read(&dest->persistconns);
2123 ip_vs_copy_stats(&entry.stats, &dest->stats);
2124 if (copy_to_user(&uptr->entrytable[count],
2125 &entry, sizeof(entry))) {
2126 ret = -EFAULT;
2127 break;
2128 }
2129 count++;
2130 }
2131 ip_vs_service_put(svc);
2132 } else
2133 ret = -ESRCH;
2134 return ret;
2135}
2136
2137static inline void
2138__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2139{
2140#ifdef CONFIG_IP_VS_PROTO_TCP
2141 u->tcp_timeout =
2142 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2143 u->tcp_fin_timeout =
2144 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2145#endif
2146#ifdef CONFIG_IP_VS_PROTO_UDP
2147 u->udp_timeout =
2148 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2149#endif
2150}
2151
2152
2153#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2154#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2155#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2156#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2157#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2158#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2159#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2160
9b5b5cff 2161static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2162 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2163 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2164 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2165 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2166 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2167 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2168 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2169};
2170
2171static int
2172do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2173{
2174 unsigned char arg[128];
2175 int ret = 0;
2176
2177 if (!capable(CAP_NET_ADMIN))
2178 return -EPERM;
2179
2180 if (*len < get_arglen[GET_CMDID(cmd)]) {
2181 IP_VS_ERR("get_ctl: len %u < %u\n",
2182 *len, get_arglen[GET_CMDID(cmd)]);
2183 return -EINVAL;
2184 }
2185
2186 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2187 return -EFAULT;
2188
14cc3e2b 2189 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2190 return -ERESTARTSYS;
2191
2192 switch (cmd) {
2193 case IP_VS_SO_GET_VERSION:
2194 {
2195 char buf[64];
2196
2197 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2198 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2199 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2200 ret = -EFAULT;
2201 goto out;
2202 }
2203 *len = strlen(buf)+1;
2204 }
2205 break;
2206
2207 case IP_VS_SO_GET_INFO:
2208 {
2209 struct ip_vs_getinfo info;
2210 info.version = IP_VS_VERSION_CODE;
2211 info.size = IP_VS_CONN_TAB_SIZE;
2212 info.num_services = ip_vs_num_services;
2213 if (copy_to_user(user, &info, sizeof(info)) != 0)
2214 ret = -EFAULT;
2215 }
2216 break;
2217
2218 case IP_VS_SO_GET_SERVICES:
2219 {
2220 struct ip_vs_get_services *get;
2221 int size;
2222
2223 get = (struct ip_vs_get_services *)arg;
2224 size = sizeof(*get) +
2225 sizeof(struct ip_vs_service_entry) * get->num_services;
2226 if (*len != size) {
2227 IP_VS_ERR("length: %u != %u\n", *len, size);
2228 ret = -EINVAL;
2229 goto out;
2230 }
2231 ret = __ip_vs_get_service_entries(get, user);
2232 }
2233 break;
2234
2235 case IP_VS_SO_GET_SERVICE:
2236 {
2237 struct ip_vs_service_entry *entry;
2238 struct ip_vs_service *svc;
2239
2240 entry = (struct ip_vs_service_entry *)arg;
2241 if (entry->fwmark)
2242 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2243 else
2244 svc = __ip_vs_service_get(entry->protocol,
2245 entry->addr, entry->port);
2246 if (svc) {
2247 ip_vs_copy_service(entry, svc);
2248 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2249 ret = -EFAULT;
2250 ip_vs_service_put(svc);
2251 } else
2252 ret = -ESRCH;
2253 }
2254 break;
2255
2256 case IP_VS_SO_GET_DESTS:
2257 {
2258 struct ip_vs_get_dests *get;
2259 int size;
2260
2261 get = (struct ip_vs_get_dests *)arg;
2262 size = sizeof(*get) +
2263 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2264 if (*len != size) {
2265 IP_VS_ERR("length: %u != %u\n", *len, size);
2266 ret = -EINVAL;
2267 goto out;
2268 }
2269 ret = __ip_vs_get_dest_entries(get, user);
2270 }
2271 break;
2272
2273 case IP_VS_SO_GET_TIMEOUT:
2274 {
2275 struct ip_vs_timeout_user t;
2276
2277 __ip_vs_get_timeouts(&t);
2278 if (copy_to_user(user, &t, sizeof(t)) != 0)
2279 ret = -EFAULT;
2280 }
2281 break;
2282
2283 case IP_VS_SO_GET_DAEMON:
2284 {
2285 struct ip_vs_daemon_user d[2];
2286
2287 memset(&d, 0, sizeof(d));
2288 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2289 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2290 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2291 d[0].syncid = ip_vs_master_syncid;
2292 }
2293 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2294 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2295 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2296 d[1].syncid = ip_vs_backup_syncid;
2297 }
2298 if (copy_to_user(user, &d, sizeof(d)) != 0)
2299 ret = -EFAULT;
2300 }
2301 break;
2302
2303 default:
2304 ret = -EINVAL;
2305 }
2306
2307 out:
14cc3e2b 2308 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2309 return ret;
2310}
2311
2312
2313static struct nf_sockopt_ops ip_vs_sockopts = {
2314 .pf = PF_INET,
2315 .set_optmin = IP_VS_BASE_CTL,
2316 .set_optmax = IP_VS_SO_SET_MAX+1,
2317 .set = do_ip_vs_set_ctl,
2318 .get_optmin = IP_VS_BASE_CTL,
2319 .get_optmax = IP_VS_SO_GET_MAX+1,
2320 .get = do_ip_vs_get_ctl,
16fcec35 2321 .owner = THIS_MODULE,
1da177e4
LT
2322};
2323
9a812198
JV
2324/*
2325 * Generic Netlink interface
2326 */
2327
2328/* IPVS genetlink family */
2329static struct genl_family ip_vs_genl_family = {
2330 .id = GENL_ID_GENERATE,
2331 .hdrsize = 0,
2332 .name = IPVS_GENL_NAME,
2333 .version = IPVS_GENL_VERSION,
2334 .maxattr = IPVS_CMD_MAX,
2335};
2336
2337/* Policy used for first-level command attributes */
2338static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2339 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2340 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2341 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2342 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2343 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2344 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2345};
2346
2347/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2348static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2349 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2350 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2351 .len = IP_VS_IFNAME_MAXLEN },
2352 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2353};
2354
2355/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2356static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2357 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2358 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2359 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2360 .len = sizeof(union nf_inet_addr) },
2361 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2362 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2363 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2364 .len = IP_VS_SCHEDNAME_MAXLEN },
2365 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2366 .len = sizeof(struct ip_vs_flags) },
2367 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2368 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2369 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2370};
2371
2372/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2373static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2374 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2375 .len = sizeof(union nf_inet_addr) },
2376 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2377 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2378 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2379 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2380 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2381 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2382 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2383 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2384 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2385};
2386
2387static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2388 struct ip_vs_stats *stats)
2389{
2390 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2391 if (!nl_stats)
2392 return -EMSGSIZE;
2393
2394 spin_lock_bh(&stats->lock);
2395
2396 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2397 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2398 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2399 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2400 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2401 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2402 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2403 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2404 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2405 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2406
2407 spin_unlock_bh(&stats->lock);
2408
2409 nla_nest_end(skb, nl_stats);
2410
2411 return 0;
2412
2413nla_put_failure:
2414 spin_unlock_bh(&stats->lock);
2415 nla_nest_cancel(skb, nl_stats);
2416 return -EMSGSIZE;
2417}
2418
2419static int ip_vs_genl_fill_service(struct sk_buff *skb,
2420 struct ip_vs_service *svc)
2421{
2422 struct nlattr *nl_service;
2423 struct ip_vs_flags flags = { .flags = svc->flags,
2424 .mask = ~0 };
2425
2426 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2427 if (!nl_service)
2428 return -EMSGSIZE;
2429
2430 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2431
2432 if (svc->fwmark) {
2433 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2434 } else {
2435 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2436 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2437 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2438 }
2439
2440 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2441 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2442 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2443 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2444
2445 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2446 goto nla_put_failure;
2447
2448 nla_nest_end(skb, nl_service);
2449
2450 return 0;
2451
2452nla_put_failure:
2453 nla_nest_cancel(skb, nl_service);
2454 return -EMSGSIZE;
2455}
2456
2457static int ip_vs_genl_dump_service(struct sk_buff *skb,
2458 struct ip_vs_service *svc,
2459 struct netlink_callback *cb)
2460{
2461 void *hdr;
2462
2463 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2464 &ip_vs_genl_family, NLM_F_MULTI,
2465 IPVS_CMD_NEW_SERVICE);
2466 if (!hdr)
2467 return -EMSGSIZE;
2468
2469 if (ip_vs_genl_fill_service(skb, svc) < 0)
2470 goto nla_put_failure;
2471
2472 return genlmsg_end(skb, hdr);
2473
2474nla_put_failure:
2475 genlmsg_cancel(skb, hdr);
2476 return -EMSGSIZE;
2477}
2478
2479static int ip_vs_genl_dump_services(struct sk_buff *skb,
2480 struct netlink_callback *cb)
2481{
2482 int idx = 0, i;
2483 int start = cb->args[0];
2484 struct ip_vs_service *svc;
2485
2486 mutex_lock(&__ip_vs_mutex);
2487 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2488 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2489 if (++idx <= start)
2490 continue;
2491 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2492 idx--;
2493 goto nla_put_failure;
2494 }
2495 }
2496 }
2497
2498 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2499 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2500 if (++idx <= start)
2501 continue;
2502 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2503 idx--;
2504 goto nla_put_failure;
2505 }
2506 }
2507 }
2508
2509nla_put_failure:
2510 mutex_unlock(&__ip_vs_mutex);
2511 cb->args[0] = idx;
2512
2513 return skb->len;
2514}
2515
2516static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2517 struct nlattr *nla, int full_entry)
2518{
2519 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2520 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2521
2522 /* Parse mandatory identifying service fields first */
2523 if (nla == NULL ||
2524 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2525 return -EINVAL;
2526
2527 nla_af = attrs[IPVS_SVC_ATTR_AF];
2528 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2529 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2530 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2531 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2532
2533 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2534 return -EINVAL;
2535
2536 /* For now, only support IPv4 */
2537 if (nla_get_u16(nla_af) != AF_INET)
2538 return -EAFNOSUPPORT;
2539
2540 if (nla_fwmark) {
2541 usvc->protocol = IPPROTO_TCP;
2542 usvc->fwmark = nla_get_u32(nla_fwmark);
2543 } else {
2544 usvc->protocol = nla_get_u16(nla_protocol);
2545 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2546 usvc->port = nla_get_u16(nla_port);
2547 usvc->fwmark = 0;
2548 }
2549
2550 /* If a full entry was requested, check for the additional fields */
2551 if (full_entry) {
2552 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2553 *nla_netmask;
2554 struct ip_vs_flags flags;
2555 struct ip_vs_service *svc;
2556
2557 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2558 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2559 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2560 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2561
2562 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2563 return -EINVAL;
2564
2565 nla_memcpy(&flags, nla_flags, sizeof(flags));
2566
2567 /* prefill flags from service if it already exists */
2568 if (usvc->fwmark)
2569 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2570 else
2571 svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2572 usvc->port);
2573 if (svc) {
2574 usvc->flags = svc->flags;
2575 ip_vs_service_put(svc);
2576 } else
2577 usvc->flags = 0;
2578
2579 /* set new flags from userland */
2580 usvc->flags = (usvc->flags & ~flags.mask) |
2581 (flags.flags & flags.mask);
2582
2583 strlcpy(usvc->sched_name, nla_data(nla_sched),
2584 sizeof(usvc->sched_name));
2585 usvc->timeout = nla_get_u32(nla_timeout);
2586 usvc->netmask = nla_get_u32(nla_netmask);
2587 }
2588
2589 return 0;
2590}
2591
2592static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2593{
2594 struct ip_vs_service_user usvc;
2595 int ret;
2596
2597 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2598 if (ret)
2599 return ERR_PTR(ret);
2600
2601 if (usvc.fwmark)
2602 return __ip_vs_svc_fwm_get(usvc.fwmark);
2603 else
2604 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2605 usvc.port);
2606}
2607
2608static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2609{
2610 struct nlattr *nl_dest;
2611
2612 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2613 if (!nl_dest)
2614 return -EMSGSIZE;
2615
2616 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2617 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2618
2619 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2620 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2621 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2622 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2623 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2624 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2625 atomic_read(&dest->activeconns));
2626 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2627 atomic_read(&dest->inactconns));
2628 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2629 atomic_read(&dest->persistconns));
2630
2631 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2632 goto nla_put_failure;
2633
2634 nla_nest_end(skb, nl_dest);
2635
2636 return 0;
2637
2638nla_put_failure:
2639 nla_nest_cancel(skb, nl_dest);
2640 return -EMSGSIZE;
2641}
2642
2643static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2644 struct netlink_callback *cb)
2645{
2646 void *hdr;
2647
2648 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2649 &ip_vs_genl_family, NLM_F_MULTI,
2650 IPVS_CMD_NEW_DEST);
2651 if (!hdr)
2652 return -EMSGSIZE;
2653
2654 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2655 goto nla_put_failure;
2656
2657 return genlmsg_end(skb, hdr);
2658
2659nla_put_failure:
2660 genlmsg_cancel(skb, hdr);
2661 return -EMSGSIZE;
2662}
2663
2664static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2665 struct netlink_callback *cb)
2666{
2667 int idx = 0;
2668 int start = cb->args[0];
2669 struct ip_vs_service *svc;
2670 struct ip_vs_dest *dest;
2671 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2672
2673 mutex_lock(&__ip_vs_mutex);
2674
2675 /* Try to find the service for which to dump destinations */
2676 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2677 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2678 goto out_err;
2679
2680 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2681 if (IS_ERR(svc) || svc == NULL)
2682 goto out_err;
2683
2684 /* Dump the destinations */
2685 list_for_each_entry(dest, &svc->destinations, n_list) {
2686 if (++idx <= start)
2687 continue;
2688 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2689 idx--;
2690 goto nla_put_failure;
2691 }
2692 }
2693
2694nla_put_failure:
2695 cb->args[0] = idx;
2696 ip_vs_service_put(svc);
2697
2698out_err:
2699 mutex_unlock(&__ip_vs_mutex);
2700
2701 return skb->len;
2702}
2703
2704static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2705 struct nlattr *nla, int full_entry)
2706{
2707 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2708 struct nlattr *nla_addr, *nla_port;
2709
2710 /* Parse mandatory identifying destination fields first */
2711 if (nla == NULL ||
2712 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2713 return -EINVAL;
2714
2715 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2716 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2717
2718 if (!(nla_addr && nla_port))
2719 return -EINVAL;
2720
2721 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2722 udest->port = nla_get_u16(nla_port);
2723
2724 /* If a full entry was requested, check for the additional fields */
2725 if (full_entry) {
2726 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2727 *nla_l_thresh;
2728
2729 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2730 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2731 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2732 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2733
2734 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2735 return -EINVAL;
2736
2737 udest->conn_flags = nla_get_u32(nla_fwd)
2738 & IP_VS_CONN_F_FWD_MASK;
2739 udest->weight = nla_get_u32(nla_weight);
2740 udest->u_threshold = nla_get_u32(nla_u_thresh);
2741 udest->l_threshold = nla_get_u32(nla_l_thresh);
2742 }
2743
2744 return 0;
2745}
2746
2747static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2748 const char *mcast_ifn, __be32 syncid)
2749{
2750 struct nlattr *nl_daemon;
2751
2752 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2753 if (!nl_daemon)
2754 return -EMSGSIZE;
2755
2756 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2757 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2758 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2759
2760 nla_nest_end(skb, nl_daemon);
2761
2762 return 0;
2763
2764nla_put_failure:
2765 nla_nest_cancel(skb, nl_daemon);
2766 return -EMSGSIZE;
2767}
2768
2769static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2770 const char *mcast_ifn, __be32 syncid,
2771 struct netlink_callback *cb)
2772{
2773 void *hdr;
2774 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2775 &ip_vs_genl_family, NLM_F_MULTI,
2776 IPVS_CMD_NEW_DAEMON);
2777 if (!hdr)
2778 return -EMSGSIZE;
2779
2780 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2781 goto nla_put_failure;
2782
2783 return genlmsg_end(skb, hdr);
2784
2785nla_put_failure:
2786 genlmsg_cancel(skb, hdr);
2787 return -EMSGSIZE;
2788}
2789
2790static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2791 struct netlink_callback *cb)
2792{
2793 mutex_lock(&__ip_vs_mutex);
2794 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2795 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2796 ip_vs_master_mcast_ifn,
2797 ip_vs_master_syncid, cb) < 0)
2798 goto nla_put_failure;
2799
2800 cb->args[0] = 1;
2801 }
2802
2803 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2804 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2805 ip_vs_backup_mcast_ifn,
2806 ip_vs_backup_syncid, cb) < 0)
2807 goto nla_put_failure;
2808
2809 cb->args[1] = 1;
2810 }
2811
2812nla_put_failure:
2813 mutex_unlock(&__ip_vs_mutex);
2814
2815 return skb->len;
2816}
2817
2818static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2819{
2820 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2821 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2822 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2823 return -EINVAL;
2824
2825 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2826 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2827 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2828}
2829
2830static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2831{
2832 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2833 return -EINVAL;
2834
2835 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2836}
2837
2838static int ip_vs_genl_set_config(struct nlattr **attrs)
2839{
2840 struct ip_vs_timeout_user t;
2841
2842 __ip_vs_get_timeouts(&t);
2843
2844 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2845 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2846
2847 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2848 t.tcp_fin_timeout =
2849 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2850
2851 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2852 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2853
2854 return ip_vs_set_timeout(&t);
2855}
2856
2857static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2858{
2859 struct ip_vs_service *svc = NULL;
2860 struct ip_vs_service_user usvc;
2861 struct ip_vs_dest_user udest;
2862 int ret = 0, cmd;
2863 int need_full_svc = 0, need_full_dest = 0;
2864
2865 cmd = info->genlhdr->cmd;
2866
2867 mutex_lock(&__ip_vs_mutex);
2868
2869 if (cmd == IPVS_CMD_FLUSH) {
2870 ret = ip_vs_flush();
2871 goto out;
2872 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2873 ret = ip_vs_genl_set_config(info->attrs);
2874 goto out;
2875 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2876 cmd == IPVS_CMD_DEL_DAEMON) {
2877
2878 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2879
2880 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2881 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2882 info->attrs[IPVS_CMD_ATTR_DAEMON],
2883 ip_vs_daemon_policy)) {
2884 ret = -EINVAL;
2885 goto out;
2886 }
2887
2888 if (cmd == IPVS_CMD_NEW_DAEMON)
2889 ret = ip_vs_genl_new_daemon(daemon_attrs);
2890 else
2891 ret = ip_vs_genl_del_daemon(daemon_attrs);
2892 goto out;
2893 } else if (cmd == IPVS_CMD_ZERO &&
2894 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2895 ret = ip_vs_zero_all();
2896 goto out;
2897 }
2898
2899 /* All following commands require a service argument, so check if we
2900 * received a valid one. We need a full service specification when
2901 * adding / editing a service. Only identifying members otherwise. */
2902 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2903 need_full_svc = 1;
2904
2905 ret = ip_vs_genl_parse_service(&usvc,
2906 info->attrs[IPVS_CMD_ATTR_SERVICE],
2907 need_full_svc);
2908 if (ret)
2909 goto out;
2910
2911 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2912 if (usvc.fwmark == 0)
2913 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2914 else
2915 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2916
2917 /* Unless we're adding a new service, the service must already exist */
2918 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2919 ret = -ESRCH;
2920 goto out;
2921 }
2922
2923 /* Destination commands require a valid destination argument. For
2924 * adding / editing a destination, we need a full destination
2925 * specification. */
2926 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2927 cmd == IPVS_CMD_DEL_DEST) {
2928 if (cmd != IPVS_CMD_DEL_DEST)
2929 need_full_dest = 1;
2930
2931 ret = ip_vs_genl_parse_dest(&udest,
2932 info->attrs[IPVS_CMD_ATTR_DEST],
2933 need_full_dest);
2934 if (ret)
2935 goto out;
2936 }
2937
2938 switch (cmd) {
2939 case IPVS_CMD_NEW_SERVICE:
2940 if (svc == NULL)
2941 ret = ip_vs_add_service(&usvc, &svc);
2942 else
2943 ret = -EEXIST;
2944 break;
2945 case IPVS_CMD_SET_SERVICE:
2946 ret = ip_vs_edit_service(svc, &usvc);
2947 break;
2948 case IPVS_CMD_DEL_SERVICE:
2949 ret = ip_vs_del_service(svc);
2950 break;
2951 case IPVS_CMD_NEW_DEST:
2952 ret = ip_vs_add_dest(svc, &udest);
2953 break;
2954 case IPVS_CMD_SET_DEST:
2955 ret = ip_vs_edit_dest(svc, &udest);
2956 break;
2957 case IPVS_CMD_DEL_DEST:
2958 ret = ip_vs_del_dest(svc, &udest);
2959 break;
2960 case IPVS_CMD_ZERO:
2961 ret = ip_vs_zero_service(svc);
2962 break;
2963 default:
2964 ret = -EINVAL;
2965 }
2966
2967out:
2968 if (svc)
2969 ip_vs_service_put(svc);
2970 mutex_unlock(&__ip_vs_mutex);
2971
2972 return ret;
2973}
2974
2975static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2976{
2977 struct sk_buff *msg;
2978 void *reply;
2979 int ret, cmd, reply_cmd;
2980
2981 cmd = info->genlhdr->cmd;
2982
2983 if (cmd == IPVS_CMD_GET_SERVICE)
2984 reply_cmd = IPVS_CMD_NEW_SERVICE;
2985 else if (cmd == IPVS_CMD_GET_INFO)
2986 reply_cmd = IPVS_CMD_SET_INFO;
2987 else if (cmd == IPVS_CMD_GET_CONFIG)
2988 reply_cmd = IPVS_CMD_SET_CONFIG;
2989 else {
2990 IP_VS_ERR("unknown Generic Netlink command\n");
2991 return -EINVAL;
2992 }
2993
2994 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2995 if (!msg)
2996 return -ENOMEM;
2997
2998 mutex_lock(&__ip_vs_mutex);
2999
3000 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3001 if (reply == NULL)
3002 goto nla_put_failure;
3003
3004 switch (cmd) {
3005 case IPVS_CMD_GET_SERVICE:
3006 {
3007 struct ip_vs_service *svc;
3008
3009 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3010 if (IS_ERR(svc)) {
3011 ret = PTR_ERR(svc);
3012 goto out_err;
3013 } else if (svc) {
3014 ret = ip_vs_genl_fill_service(msg, svc);
3015 ip_vs_service_put(svc);
3016 if (ret)
3017 goto nla_put_failure;
3018 } else {
3019 ret = -ESRCH;
3020 goto out_err;
3021 }
3022
3023 break;
3024 }
3025
3026 case IPVS_CMD_GET_CONFIG:
3027 {
3028 struct ip_vs_timeout_user t;
3029
3030 __ip_vs_get_timeouts(&t);
3031#ifdef CONFIG_IP_VS_PROTO_TCP
3032 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3033 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3034 t.tcp_fin_timeout);
3035#endif
3036#ifdef CONFIG_IP_VS_PROTO_UDP
3037 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3038#endif
3039
3040 break;
3041 }
3042
3043 case IPVS_CMD_GET_INFO:
3044 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3045 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3046 IP_VS_CONN_TAB_SIZE);
3047 break;
3048 }
3049
3050 genlmsg_end(msg, reply);
3051 ret = genlmsg_unicast(msg, info->snd_pid);
3052 goto out;
3053
3054nla_put_failure:
3055 IP_VS_ERR("not enough space in Netlink message\n");
3056 ret = -EMSGSIZE;
3057
3058out_err:
3059 nlmsg_free(msg);
3060out:
3061 mutex_unlock(&__ip_vs_mutex);
3062
3063 return ret;
3064}
3065
3066
3067static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3068 {
3069 .cmd = IPVS_CMD_NEW_SERVICE,
3070 .flags = GENL_ADMIN_PERM,
3071 .policy = ip_vs_cmd_policy,
3072 .doit = ip_vs_genl_set_cmd,
3073 },
3074 {
3075 .cmd = IPVS_CMD_SET_SERVICE,
3076 .flags = GENL_ADMIN_PERM,
3077 .policy = ip_vs_cmd_policy,
3078 .doit = ip_vs_genl_set_cmd,
3079 },
3080 {
3081 .cmd = IPVS_CMD_DEL_SERVICE,
3082 .flags = GENL_ADMIN_PERM,
3083 .policy = ip_vs_cmd_policy,
3084 .doit = ip_vs_genl_set_cmd,
3085 },
3086 {
3087 .cmd = IPVS_CMD_GET_SERVICE,
3088 .flags = GENL_ADMIN_PERM,
3089 .doit = ip_vs_genl_get_cmd,
3090 .dumpit = ip_vs_genl_dump_services,
3091 .policy = ip_vs_cmd_policy,
3092 },
3093 {
3094 .cmd = IPVS_CMD_NEW_DEST,
3095 .flags = GENL_ADMIN_PERM,
3096 .policy = ip_vs_cmd_policy,
3097 .doit = ip_vs_genl_set_cmd,
3098 },
3099 {
3100 .cmd = IPVS_CMD_SET_DEST,
3101 .flags = GENL_ADMIN_PERM,
3102 .policy = ip_vs_cmd_policy,
3103 .doit = ip_vs_genl_set_cmd,
3104 },
3105 {
3106 .cmd = IPVS_CMD_DEL_DEST,
3107 .flags = GENL_ADMIN_PERM,
3108 .policy = ip_vs_cmd_policy,
3109 .doit = ip_vs_genl_set_cmd,
3110 },
3111 {
3112 .cmd = IPVS_CMD_GET_DEST,
3113 .flags = GENL_ADMIN_PERM,
3114 .policy = ip_vs_cmd_policy,
3115 .dumpit = ip_vs_genl_dump_dests,
3116 },
3117 {
3118 .cmd = IPVS_CMD_NEW_DAEMON,
3119 .flags = GENL_ADMIN_PERM,
3120 .policy = ip_vs_cmd_policy,
3121 .doit = ip_vs_genl_set_cmd,
3122 },
3123 {
3124 .cmd = IPVS_CMD_DEL_DAEMON,
3125 .flags = GENL_ADMIN_PERM,
3126 .policy = ip_vs_cmd_policy,
3127 .doit = ip_vs_genl_set_cmd,
3128 },
3129 {
3130 .cmd = IPVS_CMD_GET_DAEMON,
3131 .flags = GENL_ADMIN_PERM,
3132 .dumpit = ip_vs_genl_dump_daemons,
3133 },
3134 {
3135 .cmd = IPVS_CMD_SET_CONFIG,
3136 .flags = GENL_ADMIN_PERM,
3137 .policy = ip_vs_cmd_policy,
3138 .doit = ip_vs_genl_set_cmd,
3139 },
3140 {
3141 .cmd = IPVS_CMD_GET_CONFIG,
3142 .flags = GENL_ADMIN_PERM,
3143 .doit = ip_vs_genl_get_cmd,
3144 },
3145 {
3146 .cmd = IPVS_CMD_GET_INFO,
3147 .flags = GENL_ADMIN_PERM,
3148 .doit = ip_vs_genl_get_cmd,
3149 },
3150 {
3151 .cmd = IPVS_CMD_ZERO,
3152 .flags = GENL_ADMIN_PERM,
3153 .policy = ip_vs_cmd_policy,
3154 .doit = ip_vs_genl_set_cmd,
3155 },
3156 {
3157 .cmd = IPVS_CMD_FLUSH,
3158 .flags = GENL_ADMIN_PERM,
3159 .doit = ip_vs_genl_set_cmd,
3160 },
3161};
3162
3163static int __init ip_vs_genl_register(void)
3164{
3165 int ret, i;
3166
3167 ret = genl_register_family(&ip_vs_genl_family);
3168 if (ret)
3169 return ret;
3170
3171 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3172 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3173 if (ret)
3174 goto err_out;
3175 }
3176 return 0;
3177
3178err_out:
3179 genl_unregister_family(&ip_vs_genl_family);
3180 return ret;
3181}
3182
3183static void ip_vs_genl_unregister(void)
3184{
3185 genl_unregister_family(&ip_vs_genl_family);
3186}
3187
3188/* End of Generic Netlink interface definitions */
3189
1da177e4 3190
048cf48b 3191int __init ip_vs_control_init(void)
1da177e4
LT
3192{
3193 int ret;
3194 int idx;
3195
3196 EnterFunction(2);
3197
3198 ret = nf_register_sockopt(&ip_vs_sockopts);
3199 if (ret) {
3200 IP_VS_ERR("cannot register sockopt.\n");
3201 return ret;
3202 }
3203
9a812198
JV
3204 ret = ip_vs_genl_register();
3205 if (ret) {
3206 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3207 nf_unregister_sockopt(&ip_vs_sockopts);
3208 return ret;
3209 }
3210
457c4cbc
EB
3211 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3212 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3213
90754f8e 3214 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3215
3216 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3217 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3218 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3219 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3220 }
3221 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3222 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3223 }
3224
1da177e4
LT
3225 ip_vs_new_estimator(&ip_vs_stats);
3226
3227 /* Hook the defense timer */
3228 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3229
3230 LeaveFunction(2);
3231 return 0;
3232}
3233
3234
3235void ip_vs_control_cleanup(void)
3236{
3237 EnterFunction(2);
3238 ip_vs_trash_cleanup();
3239 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3240 cancel_work_sync(&defense_work.work);
1da177e4
LT
3241 ip_vs_kill_estimator(&ip_vs_stats);
3242 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3243 proc_net_remove(&init_net, "ip_vs_stats");
3244 proc_net_remove(&init_net, "ip_vs");
9a812198 3245 ip_vs_genl_unregister();
1da177e4
LT
3246 nf_unregister_sockopt(&ip_vs_sockopts);
3247 LeaveFunction(2);
3248}