]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add internal versions of sockopt interface structs
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
014d730d 285ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
1da177e4
LT
286{
287 register unsigned porth = ntohs(port);
288
289 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290 & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 * Returns hash value of fwmark for virtual service lookup
295 */
296static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297{
298 return fwmark & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
305 */
306static int ip_vs_svc_hash(struct ip_vs_service *svc)
307{
308 unsigned hash;
309
310 if (svc->flags & IP_VS_SVC_F_HASHED) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
313 return 0;
314 }
315
316 if (svc->fwmark == 0) {
317 /*
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
319 */
e7ade46a
JV
320 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip,
321 svc->port);
1da177e4
LT
322 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
323 } else {
324 /*
325 * Hash it by fwmark in ip_vs_svc_fwm_table
326 */
327 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
328 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
329 }
330
331 svc->flags |= IP_VS_SVC_F_HASHED;
332 /* increase its refcnt because it is referenced by the svc table */
333 atomic_inc(&svc->refcnt);
334 return 1;
335}
336
337
338/*
339 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
340 * Should be called with locked tables.
341 */
342static int ip_vs_svc_unhash(struct ip_vs_service *svc)
343{
344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
345 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
346 "called from %p\n", __builtin_return_address(0));
347 return 0;
348 }
349
350 if (svc->fwmark == 0) {
351 /* Remove it from the ip_vs_svc_table table */
352 list_del(&svc->s_list);
353 } else {
354 /* Remove it from the ip_vs_svc_fwm_table table */
355 list_del(&svc->f_list);
356 }
357
358 svc->flags &= ~IP_VS_SVC_F_HASHED;
359 atomic_dec(&svc->refcnt);
360 return 1;
361}
362
363
364/*
365 * Get service by {proto,addr,port} in the service table.
366 */
367static __inline__ struct ip_vs_service *
014d730d 368__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
369{
370 unsigned hash;
371 struct ip_vs_service *svc;
372
373 /* Check for "full" addressed entries */
374 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
375
376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
e7ade46a 377 if ((svc->addr.ip == vaddr)
1da177e4
LT
378 && (svc->port == vport)
379 && (svc->protocol == protocol)) {
380 /* HIT */
381 atomic_inc(&svc->usecnt);
382 return svc;
383 }
384 }
385
386 return NULL;
387}
388
389
390/*
391 * Get service by {fwmark} in the service table.
392 */
393static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
394{
395 unsigned hash;
396 struct ip_vs_service *svc;
397
398 /* Check for fwmark addressed entries */
399 hash = ip_vs_svc_fwm_hashkey(fwmark);
400
401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
402 if (svc->fwmark == fwmark) {
403 /* HIT */
404 atomic_inc(&svc->usecnt);
405 return svc;
406 }
407 }
408
409 return NULL;
410}
411
412struct ip_vs_service *
014d730d 413ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
414{
415 struct ip_vs_service *svc;
416
417 read_lock(&__ip_vs_svc_lock);
418
419 /*
420 * Check the table hashed by fwmark first
421 */
422 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
423 goto out;
424
425 /*
426 * Check the table hashed by <protocol,addr,port>
427 * for "full" addressed entries
428 */
429 svc = __ip_vs_service_get(protocol, vaddr, vport);
430
431 if (svc == NULL
432 && protocol == IPPROTO_TCP
433 && atomic_read(&ip_vs_ftpsvc_counter)
434 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
435 /*
436 * Check if ftp service entry exists, the packet
437 * might belong to FTP data connections.
438 */
439 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
440 }
441
442 if (svc == NULL
443 && atomic_read(&ip_vs_nullsvc_counter)) {
444 /*
445 * Check if the catch-all port (port zero) exists
446 */
447 svc = __ip_vs_service_get(protocol, vaddr, 0);
448 }
449
450 out:
451 read_unlock(&__ip_vs_svc_lock);
452
4b5bdf5c 453 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
454 fwmark, ip_vs_proto_name(protocol),
455 NIPQUAD(vaddr), ntohs(vport),
456 svc?"hit":"not hit");
457
458 return svc;
459}
460
461
462static inline void
463__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
464{
465 atomic_inc(&svc->refcnt);
466 dest->svc = svc;
467}
468
469static inline void
470__ip_vs_unbind_svc(struct ip_vs_dest *dest)
471{
472 struct ip_vs_service *svc = dest->svc;
473
474 dest->svc = NULL;
475 if (atomic_dec_and_test(&svc->refcnt))
476 kfree(svc);
477}
478
479
480/*
481 * Returns hash value for real service
482 */
014d730d 483static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
484{
485 register unsigned porth = ntohs(port);
486
487 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
488 & IP_VS_RTAB_MASK;
489}
490
491/*
492 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
493 * should be called with locked tables.
494 */
495static int ip_vs_rs_hash(struct ip_vs_dest *dest)
496{
497 unsigned hash;
498
499 if (!list_empty(&dest->d_list)) {
500 return 0;
501 }
502
503 /*
504 * Hash by proto,addr,port,
505 * which are the parameters of the real service.
506 */
e7ade46a 507 hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
1da177e4
LT
508 list_add(&dest->d_list, &ip_vs_rtable[hash]);
509
510 return 1;
511}
512
513/*
514 * UNhashes ip_vs_dest from ip_vs_rtable.
515 * should be called with locked tables.
516 */
517static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
518{
519 /*
520 * Remove it from the ip_vs_rtable table.
521 */
522 if (!list_empty(&dest->d_list)) {
523 list_del(&dest->d_list);
524 INIT_LIST_HEAD(&dest->d_list);
525 }
526
527 return 1;
528}
529
530/*
531 * Lookup real service by <proto,addr,port> in the real service table.
532 */
533struct ip_vs_dest *
014d730d 534ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
535{
536 unsigned hash;
537 struct ip_vs_dest *dest;
538
539 /*
540 * Check for "full" addressed entries
541 * Return the first found entry
542 */
543 hash = ip_vs_rs_hashkey(daddr, dport);
544
545 read_lock(&__ip_vs_rs_lock);
546 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
e7ade46a 547 if ((dest->addr.ip == daddr)
1da177e4
LT
548 && (dest->port == dport)
549 && ((dest->protocol == protocol) ||
550 dest->vfwmark)) {
551 /* HIT */
552 read_unlock(&__ip_vs_rs_lock);
553 return dest;
554 }
555 }
556 read_unlock(&__ip_vs_rs_lock);
557
558 return NULL;
559}
560
561/*
562 * Lookup destination by {addr,port} in the given service
563 */
564static struct ip_vs_dest *
014d730d 565ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
566{
567 struct ip_vs_dest *dest;
568
569 /*
570 * Find the destination for the given service
571 */
572 list_for_each_entry(dest, &svc->destinations, n_list) {
e7ade46a 573 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
1da177e4
LT
574 /* HIT */
575 return dest;
576 }
577 }
578
579 return NULL;
580}
581
1e356f9c
RB
582/*
583 * Find destination by {daddr,dport,vaddr,protocol}
584 * Cretaed to be used in ip_vs_process_message() in
585 * the backup synchronization daemon. It finds the
586 * destination to be bound to the received connection
587 * on the backup.
588 *
589 * ip_vs_lookup_real_service() looked promissing, but
590 * seems not working as expected.
591 */
592struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
593 __be32 vaddr, __be16 vport, __u16 protocol)
594{
595 struct ip_vs_dest *dest;
596 struct ip_vs_service *svc;
597
598 svc = ip_vs_service_get(0, protocol, vaddr, vport);
599 if (!svc)
600 return NULL;
601 dest = ip_vs_lookup_dest(svc, daddr, dport);
602 if (dest)
603 atomic_inc(&dest->refcnt);
604 ip_vs_service_put(svc);
605 return dest;
606}
1da177e4
LT
607
608/*
609 * Lookup dest by {svc,addr,port} in the destination trash.
610 * The destination trash is used to hold the destinations that are removed
611 * from the service table but are still referenced by some conn entries.
612 * The reason to add the destination trash is when the dest is temporary
613 * down (either by administrator or by monitor program), the dest can be
614 * picked back from the trash, the remaining connections to the dest can
615 * continue, and the counting information of the dest is also useful for
616 * scheduling.
617 */
618static struct ip_vs_dest *
014d730d 619ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
620{
621 struct ip_vs_dest *dest, *nxt;
622
623 /*
624 * Find the destination in trash
625 */
626 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
627 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 628 "dest->refcnt=%d\n",
1da177e4 629 dest->vfwmark,
e7ade46a 630 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4 631 atomic_read(&dest->refcnt));
e7ade46a 632 if (dest->addr.ip == daddr &&
1da177e4
LT
633 dest->port == dport &&
634 dest->vfwmark == svc->fwmark &&
635 dest->protocol == svc->protocol &&
636 (svc->fwmark ||
e7ade46a 637 (dest->vaddr.ip == svc->addr.ip &&
1da177e4
LT
638 dest->vport == svc->port))) {
639 /* HIT */
640 return dest;
641 }
642
643 /*
644 * Try to purge the destination from trash if not referenced
645 */
646 if (atomic_read(&dest->refcnt) == 1) {
647 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
648 "from trash\n",
649 dest->vfwmark,
e7ade46a 650 NIPQUAD(dest->addr.ip), ntohs(dest->port));
1da177e4
LT
651 list_del(&dest->n_list);
652 ip_vs_dst_reset(dest);
653 __ip_vs_unbind_svc(dest);
654 kfree(dest);
655 }
656 }
657
658 return NULL;
659}
660
661
662/*
663 * Clean up all the destinations in the trash
664 * Called by the ip_vs_control_cleanup()
665 *
666 * When the ip_vs_control_clearup is activated by ipvs module exit,
667 * the service tables must have been flushed and all the connections
668 * are expired, and the refcnt of each destination in the trash must
669 * be 1, so we simply release them here.
670 */
671static void ip_vs_trash_cleanup(void)
672{
673 struct ip_vs_dest *dest, *nxt;
674
675 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
676 list_del(&dest->n_list);
677 ip_vs_dst_reset(dest);
678 __ip_vs_unbind_svc(dest);
679 kfree(dest);
680 }
681}
682
683
684static void
685ip_vs_zero_stats(struct ip_vs_stats *stats)
686{
687 spin_lock_bh(&stats->lock);
e93615d0
SH
688
689 stats->conns = 0;
690 stats->inpkts = 0;
691 stats->outpkts = 0;
692 stats->inbytes = 0;
693 stats->outbytes = 0;
694
695 stats->cps = 0;
696 stats->inpps = 0;
697 stats->outpps = 0;
698 stats->inbps = 0;
699 stats->outbps = 0;
700
1da177e4 701 ip_vs_zero_estimator(stats);
e93615d0 702
3a14a313 703 spin_unlock_bh(&stats->lock);
1da177e4
LT
704}
705
706/*
707 * Update a destination in the given service
708 */
709static void
710__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 711 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
712{
713 int conn_flags;
714
715 /* set the weight and the flags */
716 atomic_set(&dest->weight, udest->weight);
717 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
718
719 /* check if local node and update the flags */
c860c6b1 720 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
1da177e4
LT
721 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
722 | IP_VS_CONN_F_LOCALNODE;
723 }
724
725 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
726 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
727 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
728 } else {
729 /*
730 * Put the real service in ip_vs_rtable if not present.
731 * For now only for NAT!
732 */
733 write_lock_bh(&__ip_vs_rs_lock);
734 ip_vs_rs_hash(dest);
735 write_unlock_bh(&__ip_vs_rs_lock);
736 }
737 atomic_set(&dest->conn_flags, conn_flags);
738
739 /* bind the service */
740 if (!dest->svc) {
741 __ip_vs_bind_svc(dest, svc);
742 } else {
743 if (dest->svc != svc) {
744 __ip_vs_unbind_svc(dest);
745 ip_vs_zero_stats(&dest->stats);
746 __ip_vs_bind_svc(dest, svc);
747 }
748 }
749
750 /* set the dest status flags */
751 dest->flags |= IP_VS_DEST_F_AVAILABLE;
752
753 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
754 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
755 dest->u_threshold = udest->u_threshold;
756 dest->l_threshold = udest->l_threshold;
757}
758
759
760/*
761 * Create a destination for the given service
762 */
763static int
c860c6b1 764ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
765 struct ip_vs_dest **dest_p)
766{
767 struct ip_vs_dest *dest;
768 unsigned atype;
769
770 EnterFunction(2);
771
c860c6b1 772 atype = inet_addr_type(&init_net, udest->addr.ip);
1da177e4
LT
773 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
774 return -EINVAL;
775
0da974f4 776 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
777 if (dest == NULL) {
778 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
779 return -ENOMEM;
780 }
1da177e4 781
c860c6b1 782 dest->af = svc->af;
1da177e4 783 dest->protocol = svc->protocol;
c860c6b1 784 dest->vaddr = svc->addr;
1da177e4
LT
785 dest->vport = svc->port;
786 dest->vfwmark = svc->fwmark;
c860c6b1 787 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
788 dest->port = udest->port;
789
790 atomic_set(&dest->activeconns, 0);
791 atomic_set(&dest->inactconns, 0);
792 atomic_set(&dest->persistconns, 0);
793 atomic_set(&dest->refcnt, 0);
794
795 INIT_LIST_HEAD(&dest->d_list);
796 spin_lock_init(&dest->dst_lock);
797 spin_lock_init(&dest->stats.lock);
798 __ip_vs_update_dest(svc, dest, udest);
799 ip_vs_new_estimator(&dest->stats);
800
801 *dest_p = dest;
802
803 LeaveFunction(2);
804 return 0;
805}
806
807
808/*
809 * Add a destination into an existing service
810 */
811static int
c860c6b1 812ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
813{
814 struct ip_vs_dest *dest;
c860c6b1 815 union nf_inet_addr daddr;
014d730d 816 __be16 dport = udest->port;
1da177e4
LT
817 int ret;
818
819 EnterFunction(2);
820
821 if (udest->weight < 0) {
822 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
823 return -ERANGE;
824 }
825
826 if (udest->l_threshold > udest->u_threshold) {
827 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
828 "upper threshold\n");
829 return -ERANGE;
830 }
831
c860c6b1
JV
832 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
833
1da177e4
LT
834 /*
835 * Check if the dest already exists in the list
836 */
c860c6b1 837 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
838 if (dest != NULL) {
839 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
840 return -EEXIST;
841 }
842
843 /*
844 * Check if the dest already exists in the trash and
845 * is from the same service
846 */
c860c6b1 847 dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
1da177e4
LT
848 if (dest != NULL) {
849 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 850 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
851 NIPQUAD(daddr), ntohs(dport),
852 atomic_read(&dest->refcnt),
853 dest->vfwmark,
e7ade46a 854 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
855 ntohs(dest->vport));
856 __ip_vs_update_dest(svc, dest, udest);
857
858 /*
859 * Get the destination from the trash
860 */
861 list_del(&dest->n_list);
862
863 ip_vs_new_estimator(&dest->stats);
864
865 write_lock_bh(&__ip_vs_svc_lock);
866
867 /*
868 * Wait until all other svc users go away.
869 */
870 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
871
872 list_add(&dest->n_list, &svc->destinations);
873 svc->num_dests++;
874
875 /* call the update_service function of its scheduler */
82dfb6f3
SW
876 if (svc->scheduler->update_service)
877 svc->scheduler->update_service(svc);
1da177e4
LT
878
879 write_unlock_bh(&__ip_vs_svc_lock);
880 return 0;
881 }
882
883 /*
884 * Allocate and initialize the dest structure
885 */
886 ret = ip_vs_new_dest(svc, udest, &dest);
887 if (ret) {
888 return ret;
889 }
890
891 /*
892 * Add the dest entry into the list
893 */
894 atomic_inc(&dest->refcnt);
895
896 write_lock_bh(&__ip_vs_svc_lock);
897
898 /*
899 * Wait until all other svc users go away.
900 */
901 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
902
903 list_add(&dest->n_list, &svc->destinations);
904 svc->num_dests++;
905
906 /* call the update_service function of its scheduler */
82dfb6f3
SW
907 if (svc->scheduler->update_service)
908 svc->scheduler->update_service(svc);
1da177e4
LT
909
910 write_unlock_bh(&__ip_vs_svc_lock);
911
912 LeaveFunction(2);
913
914 return 0;
915}
916
917
918/*
919 * Edit a destination in the given service
920 */
921static int
c860c6b1 922ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
923{
924 struct ip_vs_dest *dest;
c860c6b1 925 union nf_inet_addr daddr;
014d730d 926 __be16 dport = udest->port;
1da177e4
LT
927
928 EnterFunction(2);
929
930 if (udest->weight < 0) {
931 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
932 return -ERANGE;
933 }
934
935 if (udest->l_threshold > udest->u_threshold) {
936 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
937 "upper threshold\n");
938 return -ERANGE;
939 }
940
c860c6b1
JV
941 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
942
1da177e4
LT
943 /*
944 * Lookup the destination list
945 */
c860c6b1 946 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
947 if (dest == NULL) {
948 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
949 return -ENOENT;
950 }
951
952 __ip_vs_update_dest(svc, dest, udest);
953
954 write_lock_bh(&__ip_vs_svc_lock);
955
956 /* Wait until all other svc users go away */
cae7ca3d 957 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
958
959 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
960 if (svc->scheduler->update_service)
961 svc->scheduler->update_service(svc);
1da177e4
LT
962
963 write_unlock_bh(&__ip_vs_svc_lock);
964
965 LeaveFunction(2);
966
967 return 0;
968}
969
970
971/*
972 * Delete a destination (must be already unlinked from the service)
973 */
974static void __ip_vs_del_dest(struct ip_vs_dest *dest)
975{
976 ip_vs_kill_estimator(&dest->stats);
977
978 /*
979 * Remove it from the d-linked list with the real services.
980 */
981 write_lock_bh(&__ip_vs_rs_lock);
982 ip_vs_rs_unhash(dest);
983 write_unlock_bh(&__ip_vs_rs_lock);
984
985 /*
986 * Decrease the refcnt of the dest, and free the dest
987 * if nobody refers to it (refcnt=0). Otherwise, throw
988 * the destination into the trash.
989 */
990 if (atomic_dec_and_test(&dest->refcnt)) {
991 ip_vs_dst_reset(dest);
992 /* simply decrease svc->refcnt here, let the caller check
993 and release the service if nobody refers to it.
994 Only user context can release destination and service,
995 and only one user context can update virtual service at a
996 time, so the operation here is OK */
997 atomic_dec(&dest->svc->refcnt);
998 kfree(dest);
999 } else {
4b5bdf5c
RN
1000 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1001 "dest->refcnt=%d\n",
e7ade46a 1002 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1003 atomic_read(&dest->refcnt));
1004 list_add(&dest->n_list, &ip_vs_dest_trash);
1005 atomic_inc(&dest->refcnt);
1006 }
1007}
1008
1009
1010/*
1011 * Unlink a destination from the given service
1012 */
1013static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1014 struct ip_vs_dest *dest,
1015 int svcupd)
1016{
1017 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1018
1019 /*
1020 * Remove it from the d-linked destination list.
1021 */
1022 list_del(&dest->n_list);
1023 svc->num_dests--;
82dfb6f3
SW
1024
1025 /*
1026 * Call the update_service function of its scheduler
1027 */
1028 if (svcupd && svc->scheduler->update_service)
1029 svc->scheduler->update_service(svc);
1da177e4
LT
1030}
1031
1032
1033/*
1034 * Delete a destination server in the given service
1035 */
1036static int
c860c6b1 1037ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1038{
1039 struct ip_vs_dest *dest;
014d730d 1040 __be16 dport = udest->port;
1da177e4
LT
1041
1042 EnterFunction(2);
1043
c860c6b1
JV
1044 dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
1045
1da177e4
LT
1046 if (dest == NULL) {
1047 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1048 return -ENOENT;
1049 }
1050
1051 write_lock_bh(&__ip_vs_svc_lock);
1052
1053 /*
1054 * Wait until all other svc users go away.
1055 */
1056 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1057
1058 /*
1059 * Unlink dest from the service
1060 */
1061 __ip_vs_unlink_dest(svc, dest, 1);
1062
1063 write_unlock_bh(&__ip_vs_svc_lock);
1064
1065 /*
1066 * Delete the destination
1067 */
1068 __ip_vs_del_dest(dest);
1069
1070 LeaveFunction(2);
1071
1072 return 0;
1073}
1074
1075
1076/*
1077 * Add a service into the service hash table
1078 */
1079static int
c860c6b1
JV
1080ip_vs_add_service(struct ip_vs_service_user_kern *u,
1081 struct ip_vs_service **svc_p)
1da177e4
LT
1082{
1083 int ret = 0;
1084 struct ip_vs_scheduler *sched = NULL;
1085 struct ip_vs_service *svc = NULL;
1086
1087 /* increase the module use count */
1088 ip_vs_use_count_inc();
1089
1090 /* Lookup the scheduler by 'u->sched_name' */
1091 sched = ip_vs_scheduler_get(u->sched_name);
1092 if (sched == NULL) {
1093 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1094 u->sched_name);
1095 ret = -ENOENT;
1096 goto out_mod_dec;
1097 }
1098
0da974f4 1099 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1100 if (svc == NULL) {
1101 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1102 ret = -ENOMEM;
1103 goto out_err;
1104 }
1da177e4
LT
1105
1106 /* I'm the first user of the service */
1107 atomic_set(&svc->usecnt, 1);
1108 atomic_set(&svc->refcnt, 0);
1109
c860c6b1 1110 svc->af = u->af;
1da177e4 1111 svc->protocol = u->protocol;
c860c6b1 1112 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1113 svc->port = u->port;
1114 svc->fwmark = u->fwmark;
1115 svc->flags = u->flags;
1116 svc->timeout = u->timeout * HZ;
1117 svc->netmask = u->netmask;
1118
1119 INIT_LIST_HEAD(&svc->destinations);
1120 rwlock_init(&svc->sched_lock);
1121 spin_lock_init(&svc->stats.lock);
1122
1123 /* Bind the scheduler */
1124 ret = ip_vs_bind_scheduler(svc, sched);
1125 if (ret)
1126 goto out_err;
1127 sched = NULL;
1128
1129 /* Update the virtual service counters */
1130 if (svc->port == FTPPORT)
1131 atomic_inc(&ip_vs_ftpsvc_counter);
1132 else if (svc->port == 0)
1133 atomic_inc(&ip_vs_nullsvc_counter);
1134
1135 ip_vs_new_estimator(&svc->stats);
1136 ip_vs_num_services++;
1137
1138 /* Hash the service into the service table */
1139 write_lock_bh(&__ip_vs_svc_lock);
1140 ip_vs_svc_hash(svc);
1141 write_unlock_bh(&__ip_vs_svc_lock);
1142
1143 *svc_p = svc;
1144 return 0;
1145
1146 out_err:
1147 if (svc != NULL) {
1148 if (svc->scheduler)
1149 ip_vs_unbind_scheduler(svc);
1150 if (svc->inc) {
1151 local_bh_disable();
1152 ip_vs_app_inc_put(svc->inc);
1153 local_bh_enable();
1154 }
1155 kfree(svc);
1156 }
1157 ip_vs_scheduler_put(sched);
1158
1159 out_mod_dec:
1160 /* decrease the module use count */
1161 ip_vs_use_count_dec();
1162
1163 return ret;
1164}
1165
1166
1167/*
1168 * Edit a service and bind it with a new scheduler
1169 */
1170static int
c860c6b1 1171ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1172{
1173 struct ip_vs_scheduler *sched, *old_sched;
1174 int ret = 0;
1175
1176 /*
1177 * Lookup the scheduler, by 'u->sched_name'
1178 */
1179 sched = ip_vs_scheduler_get(u->sched_name);
1180 if (sched == NULL) {
1181 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1182 u->sched_name);
1183 return -ENOENT;
1184 }
1185 old_sched = sched;
1186
1187 write_lock_bh(&__ip_vs_svc_lock);
1188
1189 /*
1190 * Wait until all other svc users go away.
1191 */
1192 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1193
1194 /*
1195 * Set the flags and timeout value
1196 */
1197 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1198 svc->timeout = u->timeout * HZ;
1199 svc->netmask = u->netmask;
1200
1201 old_sched = svc->scheduler;
1202 if (sched != old_sched) {
1203 /*
1204 * Unbind the old scheduler
1205 */
1206 if ((ret = ip_vs_unbind_scheduler(svc))) {
1207 old_sched = sched;
1208 goto out;
1209 }
1210
1211 /*
1212 * Bind the new scheduler
1213 */
1214 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1215 /*
1216 * If ip_vs_bind_scheduler fails, restore the old
1217 * scheduler.
1218 * The main reason of failure is out of memory.
1219 *
1220 * The question is if the old scheduler can be
1221 * restored all the time. TODO: if it cannot be
1222 * restored some time, we must delete the service,
1223 * otherwise the system may crash.
1224 */
1225 ip_vs_bind_scheduler(svc, old_sched);
1226 old_sched = sched;
1227 goto out;
1228 }
1229 }
1230
1231 out:
1232 write_unlock_bh(&__ip_vs_svc_lock);
1233
1234 if (old_sched)
1235 ip_vs_scheduler_put(old_sched);
1236
1237 return ret;
1238}
1239
1240
1241/*
1242 * Delete a service from the service list
1243 * - The service must be unlinked, unlocked and not referenced!
1244 * - We are called under _bh lock
1245 */
1246static void __ip_vs_del_service(struct ip_vs_service *svc)
1247{
1248 struct ip_vs_dest *dest, *nxt;
1249 struct ip_vs_scheduler *old_sched;
1250
1251 ip_vs_num_services--;
1252 ip_vs_kill_estimator(&svc->stats);
1253
1254 /* Unbind scheduler */
1255 old_sched = svc->scheduler;
1256 ip_vs_unbind_scheduler(svc);
1257 if (old_sched)
1258 ip_vs_scheduler_put(old_sched);
1259
1260 /* Unbind app inc */
1261 if (svc->inc) {
1262 ip_vs_app_inc_put(svc->inc);
1263 svc->inc = NULL;
1264 }
1265
1266 /*
1267 * Unlink the whole destination list
1268 */
1269 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1270 __ip_vs_unlink_dest(svc, dest, 0);
1271 __ip_vs_del_dest(dest);
1272 }
1273
1274 /*
1275 * Update the virtual service counters
1276 */
1277 if (svc->port == FTPPORT)
1278 atomic_dec(&ip_vs_ftpsvc_counter);
1279 else if (svc->port == 0)
1280 atomic_dec(&ip_vs_nullsvc_counter);
1281
1282 /*
1283 * Free the service if nobody refers to it
1284 */
1285 if (atomic_read(&svc->refcnt) == 0)
1286 kfree(svc);
1287
1288 /* decrease the module use count */
1289 ip_vs_use_count_dec();
1290}
1291
1292/*
1293 * Delete a service from the service list
1294 */
1295static int ip_vs_del_service(struct ip_vs_service *svc)
1296{
1297 if (svc == NULL)
1298 return -EEXIST;
1299
1300 /*
1301 * Unhash it from the service table
1302 */
1303 write_lock_bh(&__ip_vs_svc_lock);
1304
1305 ip_vs_svc_unhash(svc);
1306
1307 /*
1308 * Wait until all the svc users go away.
1309 */
1310 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1311
1312 __ip_vs_del_service(svc);
1313
1314 write_unlock_bh(&__ip_vs_svc_lock);
1315
1316 return 0;
1317}
1318
1319
1320/*
1321 * Flush all the virtual services
1322 */
1323static int ip_vs_flush(void)
1324{
1325 int idx;
1326 struct ip_vs_service *svc, *nxt;
1327
1328 /*
1329 * Flush the service table hashed by <protocol,addr,port>
1330 */
1331 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1332 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1333 write_lock_bh(&__ip_vs_svc_lock);
1334 ip_vs_svc_unhash(svc);
1335 /*
1336 * Wait until all the svc users go away.
1337 */
1338 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1339 __ip_vs_del_service(svc);
1340 write_unlock_bh(&__ip_vs_svc_lock);
1341 }
1342 }
1343
1344 /*
1345 * Flush the service table hashed by fwmark
1346 */
1347 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1348 list_for_each_entry_safe(svc, nxt,
1349 &ip_vs_svc_fwm_table[idx], f_list) {
1350 write_lock_bh(&__ip_vs_svc_lock);
1351 ip_vs_svc_unhash(svc);
1352 /*
1353 * Wait until all the svc users go away.
1354 */
1355 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1356 __ip_vs_del_service(svc);
1357 write_unlock_bh(&__ip_vs_svc_lock);
1358 }
1359 }
1360
1361 return 0;
1362}
1363
1364
1365/*
1366 * Zero counters in a service or all services
1367 */
1368static int ip_vs_zero_service(struct ip_vs_service *svc)
1369{
1370 struct ip_vs_dest *dest;
1371
1372 write_lock_bh(&__ip_vs_svc_lock);
1373 list_for_each_entry(dest, &svc->destinations, n_list) {
1374 ip_vs_zero_stats(&dest->stats);
1375 }
1376 ip_vs_zero_stats(&svc->stats);
1377 write_unlock_bh(&__ip_vs_svc_lock);
1378 return 0;
1379}
1380
1381static int ip_vs_zero_all(void)
1382{
1383 int idx;
1384 struct ip_vs_service *svc;
1385
1386 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1387 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1388 ip_vs_zero_service(svc);
1389 }
1390 }
1391
1392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1393 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1394 ip_vs_zero_service(svc);
1395 }
1396 }
1397
1398 ip_vs_zero_stats(&ip_vs_stats);
1399 return 0;
1400}
1401
1402
1403static int
1404proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1405 void __user *buffer, size_t *lenp, loff_t *ppos)
1406{
1407 int *valp = table->data;
1408 int val = *valp;
1409 int rc;
1410
1411 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1412 if (write && (*valp != val)) {
1413 if ((*valp < 0) || (*valp > 3)) {
1414 /* Restore the correct value */
1415 *valp = val;
1416 } else {
1da177e4 1417 update_defense_level();
1da177e4
LT
1418 }
1419 }
1420 return rc;
1421}
1422
1423
1424static int
1425proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1426 void __user *buffer, size_t *lenp, loff_t *ppos)
1427{
1428 int *valp = table->data;
1429 int val[2];
1430 int rc;
1431
1432 /* backup the value first */
1433 memcpy(val, valp, sizeof(val));
1434
1435 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1436 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1437 /* Restore the correct value */
1438 memcpy(valp, val, sizeof(val));
1439 }
1440 return rc;
1441}
1442
1443
1444/*
1445 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1446 */
1447
1448static struct ctl_table vs_vars[] = {
1449 {
1da177e4
LT
1450 .procname = "amemthresh",
1451 .data = &sysctl_ip_vs_amemthresh,
1452 .maxlen = sizeof(int),
1453 .mode = 0644,
1454 .proc_handler = &proc_dointvec,
1455 },
1456#ifdef CONFIG_IP_VS_DEBUG
1457 {
1da177e4
LT
1458 .procname = "debug_level",
1459 .data = &sysctl_ip_vs_debug_level,
1460 .maxlen = sizeof(int),
1461 .mode = 0644,
1462 .proc_handler = &proc_dointvec,
1463 },
1464#endif
1465 {
1da177e4
LT
1466 .procname = "am_droprate",
1467 .data = &sysctl_ip_vs_am_droprate,
1468 .maxlen = sizeof(int),
1469 .mode = 0644,
1470 .proc_handler = &proc_dointvec,
1471 },
1472 {
1da177e4
LT
1473 .procname = "drop_entry",
1474 .data = &sysctl_ip_vs_drop_entry,
1475 .maxlen = sizeof(int),
1476 .mode = 0644,
1477 .proc_handler = &proc_do_defense_mode,
1478 },
1479 {
1da177e4
LT
1480 .procname = "drop_packet",
1481 .data = &sysctl_ip_vs_drop_packet,
1482 .maxlen = sizeof(int),
1483 .mode = 0644,
1484 .proc_handler = &proc_do_defense_mode,
1485 },
1486 {
1da177e4
LT
1487 .procname = "secure_tcp",
1488 .data = &sysctl_ip_vs_secure_tcp,
1489 .maxlen = sizeof(int),
1490 .mode = 0644,
1491 .proc_handler = &proc_do_defense_mode,
1492 },
1493#if 0
1494 {
1da177e4
LT
1495 .procname = "timeout_established",
1496 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1497 .maxlen = sizeof(int),
1498 .mode = 0644,
1499 .proc_handler = &proc_dointvec_jiffies,
1500 },
1501 {
1da177e4
LT
1502 .procname = "timeout_synsent",
1503 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1504 .maxlen = sizeof(int),
1505 .mode = 0644,
1506 .proc_handler = &proc_dointvec_jiffies,
1507 },
1508 {
1da177e4
LT
1509 .procname = "timeout_synrecv",
1510 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1511 .maxlen = sizeof(int),
1512 .mode = 0644,
1513 .proc_handler = &proc_dointvec_jiffies,
1514 },
1515 {
1da177e4
LT
1516 .procname = "timeout_finwait",
1517 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1518 .maxlen = sizeof(int),
1519 .mode = 0644,
1520 .proc_handler = &proc_dointvec_jiffies,
1521 },
1522 {
1da177e4
LT
1523 .procname = "timeout_timewait",
1524 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1525 .maxlen = sizeof(int),
1526 .mode = 0644,
1527 .proc_handler = &proc_dointvec_jiffies,
1528 },
1529 {
1da177e4
LT
1530 .procname = "timeout_close",
1531 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1532 .maxlen = sizeof(int),
1533 .mode = 0644,
1534 .proc_handler = &proc_dointvec_jiffies,
1535 },
1536 {
1da177e4
LT
1537 .procname = "timeout_closewait",
1538 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1539 .maxlen = sizeof(int),
1540 .mode = 0644,
1541 .proc_handler = &proc_dointvec_jiffies,
1542 },
1543 {
1da177e4
LT
1544 .procname = "timeout_lastack",
1545 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1546 .maxlen = sizeof(int),
1547 .mode = 0644,
1548 .proc_handler = &proc_dointvec_jiffies,
1549 },
1550 {
1da177e4
LT
1551 .procname = "timeout_listen",
1552 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
1555 .proc_handler = &proc_dointvec_jiffies,
1556 },
1557 {
1da177e4
LT
1558 .procname = "timeout_synack",
1559 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1560 .maxlen = sizeof(int),
1561 .mode = 0644,
1562 .proc_handler = &proc_dointvec_jiffies,
1563 },
1564 {
1da177e4
LT
1565 .procname = "timeout_udp",
1566 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1567 .maxlen = sizeof(int),
1568 .mode = 0644,
1569 .proc_handler = &proc_dointvec_jiffies,
1570 },
1571 {
1da177e4
LT
1572 .procname = "timeout_icmp",
1573 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = &proc_dointvec_jiffies,
1577 },
1578#endif
1579 {
1da177e4
LT
1580 .procname = "cache_bypass",
1581 .data = &sysctl_ip_vs_cache_bypass,
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
1584 .proc_handler = &proc_dointvec,
1585 },
1586 {
1da177e4
LT
1587 .procname = "expire_nodest_conn",
1588 .data = &sysctl_ip_vs_expire_nodest_conn,
1589 .maxlen = sizeof(int),
1590 .mode = 0644,
1591 .proc_handler = &proc_dointvec,
1592 },
1593 {
1da177e4
LT
1594 .procname = "expire_quiescent_template",
1595 .data = &sysctl_ip_vs_expire_quiescent_template,
1596 .maxlen = sizeof(int),
1597 .mode = 0644,
1598 .proc_handler = &proc_dointvec,
1599 },
1600 {
1da177e4
LT
1601 .procname = "sync_threshold",
1602 .data = &sysctl_ip_vs_sync_threshold,
1603 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1604 .mode = 0644,
1605 .proc_handler = &proc_do_sync_threshold,
1606 },
1607 {
1da177e4
LT
1608 .procname = "nat_icmp_send",
1609 .data = &sysctl_ip_vs_nat_icmp_send,
1610 .maxlen = sizeof(int),
1611 .mode = 0644,
1612 .proc_handler = &proc_dointvec,
1613 },
1614 { .ctl_name = 0 }
1615};
1616
5587da55 1617const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1618 { .procname = "net", .ctl_name = CTL_NET, },
1619 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1620 { .procname = "vs", },
1621 { }
1da177e4 1622};
90754f8e 1623EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1624
1625static struct ctl_table_header * sysctl_header;
1626
1627#ifdef CONFIG_PROC_FS
1628
1629struct ip_vs_iter {
1630 struct list_head *table;
1631 int bucket;
1632};
1633
1634/*
1635 * Write the contents of the VS rule table to a PROCfs file.
1636 * (It is kept just for backward compatibility)
1637 */
1638static inline const char *ip_vs_fwd_name(unsigned flags)
1639{
1640 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1641 case IP_VS_CONN_F_LOCALNODE:
1642 return "Local";
1643 case IP_VS_CONN_F_TUNNEL:
1644 return "Tunnel";
1645 case IP_VS_CONN_F_DROUTE:
1646 return "Route";
1647 default:
1648 return "Masq";
1649 }
1650}
1651
1652
1653/* Get the Nth entry in the two lists */
1654static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1655{
1656 struct ip_vs_iter *iter = seq->private;
1657 int idx;
1658 struct ip_vs_service *svc;
1659
1660 /* look in hash by protocol */
1661 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1662 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1663 if (pos-- == 0){
1664 iter->table = ip_vs_svc_table;
1665 iter->bucket = idx;
1666 return svc;
1667 }
1668 }
1669 }
1670
1671 /* keep looking in fwmark */
1672 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1673 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1674 if (pos-- == 0) {
1675 iter->table = ip_vs_svc_fwm_table;
1676 iter->bucket = idx;
1677 return svc;
1678 }
1679 }
1680 }
1681
1682 return NULL;
1683}
1684
1685static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1686{
1687
1688 read_lock_bh(&__ip_vs_svc_lock);
1689 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1690}
1691
1692
1693static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1694{
1695 struct list_head *e;
1696 struct ip_vs_iter *iter;
1697 struct ip_vs_service *svc;
1698
1699 ++*pos;
1700 if (v == SEQ_START_TOKEN)
1701 return ip_vs_info_array(seq,0);
1702
1703 svc = v;
1704 iter = seq->private;
1705
1706 if (iter->table == ip_vs_svc_table) {
1707 /* next service in table hashed by protocol */
1708 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1709 return list_entry(e, struct ip_vs_service, s_list);
1710
1711
1712 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1713 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1714 s_list) {
1715 return svc;
1716 }
1717 }
1718
1719 iter->table = ip_vs_svc_fwm_table;
1720 iter->bucket = -1;
1721 goto scan_fwmark;
1722 }
1723
1724 /* next service in hashed by fwmark */
1725 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1726 return list_entry(e, struct ip_vs_service, f_list);
1727
1728 scan_fwmark:
1729 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1730 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1731 f_list)
1732 return svc;
1733 }
1734
1735 return NULL;
1736}
1737
1738static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1739{
1740 read_unlock_bh(&__ip_vs_svc_lock);
1741}
1742
1743
1744static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1745{
1746 if (v == SEQ_START_TOKEN) {
1747 seq_printf(seq,
1748 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1749 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1750 seq_puts(seq,
1751 "Prot LocalAddress:Port Scheduler Flags\n");
1752 seq_puts(seq,
1753 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1754 } else {
1755 const struct ip_vs_service *svc = v;
1756 const struct ip_vs_iter *iter = seq->private;
1757 const struct ip_vs_dest *dest;
1758
1759 if (iter->table == ip_vs_svc_table)
1760 seq_printf(seq, "%s %08X:%04X %s ",
1761 ip_vs_proto_name(svc->protocol),
e7ade46a 1762 ntohl(svc->addr.ip),
1da177e4
LT
1763 ntohs(svc->port),
1764 svc->scheduler->name);
1765 else
1766 seq_printf(seq, "FWM %08X %s ",
1767 svc->fwmark, svc->scheduler->name);
1768
1769 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1770 seq_printf(seq, "persistent %d %08X\n",
1771 svc->timeout,
1772 ntohl(svc->netmask));
1773 else
1774 seq_putc(seq, '\n');
1775
1776 list_for_each_entry(dest, &svc->destinations, n_list) {
1777 seq_printf(seq,
1778 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
e7ade46a 1779 ntohl(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1780 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1781 atomic_read(&dest->weight),
1782 atomic_read(&dest->activeconns),
1783 atomic_read(&dest->inactconns));
1784 }
1785 }
1786 return 0;
1787}
1788
56b3d975 1789static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1790 .start = ip_vs_info_seq_start,
1791 .next = ip_vs_info_seq_next,
1792 .stop = ip_vs_info_seq_stop,
1793 .show = ip_vs_info_seq_show,
1794};
1795
1796static int ip_vs_info_open(struct inode *inode, struct file *file)
1797{
cf7732e4
PE
1798 return seq_open_private(file, &ip_vs_info_seq_ops,
1799 sizeof(struct ip_vs_iter));
1da177e4
LT
1800}
1801
9a32144e 1802static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1803 .owner = THIS_MODULE,
1804 .open = ip_vs_info_open,
1805 .read = seq_read,
1806 .llseek = seq_lseek,
1807 .release = seq_release_private,
1808};
1809
1810#endif
1811
519e49e8
SW
1812struct ip_vs_stats ip_vs_stats = {
1813 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1814};
1da177e4
LT
1815
1816#ifdef CONFIG_PROC_FS
1817static int ip_vs_stats_show(struct seq_file *seq, void *v)
1818{
1819
1820/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1821 seq_puts(seq,
1822 " Total Incoming Outgoing Incoming Outgoing\n");
1823 seq_printf(seq,
1824 " Conns Packets Packets Bytes Bytes\n");
1825
1826 spin_lock_bh(&ip_vs_stats.lock);
1827 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1828 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1829 (unsigned long long) ip_vs_stats.inbytes,
1830 (unsigned long long) ip_vs_stats.outbytes);
1831
1832/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1833 seq_puts(seq,
1834 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1835 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1836 ip_vs_stats.cps,
1837 ip_vs_stats.inpps,
1838 ip_vs_stats.outpps,
1839 ip_vs_stats.inbps,
1840 ip_vs_stats.outbps);
1841 spin_unlock_bh(&ip_vs_stats.lock);
1842
1843 return 0;
1844}
1845
1846static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1847{
1848 return single_open(file, ip_vs_stats_show, NULL);
1849}
1850
9a32144e 1851static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1852 .owner = THIS_MODULE,
1853 .open = ip_vs_stats_seq_open,
1854 .read = seq_read,
1855 .llseek = seq_lseek,
1856 .release = single_release,
1857};
1858
1859#endif
1860
1861/*
1862 * Set timeout values for tcp tcpfin udp in the timeout_table.
1863 */
1864static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1865{
1866 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1867 u->tcp_timeout,
1868 u->tcp_fin_timeout,
1869 u->udp_timeout);
1870
1871#ifdef CONFIG_IP_VS_PROTO_TCP
1872 if (u->tcp_timeout) {
1873 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1874 = u->tcp_timeout * HZ;
1875 }
1876
1877 if (u->tcp_fin_timeout) {
1878 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1879 = u->tcp_fin_timeout * HZ;
1880 }
1881#endif
1882
1883#ifdef CONFIG_IP_VS_PROTO_UDP
1884 if (u->udp_timeout) {
1885 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1886 = u->udp_timeout * HZ;
1887 }
1888#endif
1889 return 0;
1890}
1891
1892
1893#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1894#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1895#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1896 sizeof(struct ip_vs_dest_user))
1897#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1898#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1899#define MAX_ARG_LEN SVCDEST_ARG_LEN
1900
9b5b5cff 1901static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1902 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1903 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1904 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1905 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1906 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1907 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1908 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1909 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1910 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1911 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1912 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1913};
1914
c860c6b1
JV
1915static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1916 struct ip_vs_service_user *usvc_compat)
1917{
1918 usvc->af = AF_INET;
1919 usvc->protocol = usvc_compat->protocol;
1920 usvc->addr.ip = usvc_compat->addr;
1921 usvc->port = usvc_compat->port;
1922 usvc->fwmark = usvc_compat->fwmark;
1923
1924 /* Deep copy of sched_name is not needed here */
1925 usvc->sched_name = usvc_compat->sched_name;
1926
1927 usvc->flags = usvc_compat->flags;
1928 usvc->timeout = usvc_compat->timeout;
1929 usvc->netmask = usvc_compat->netmask;
1930}
1931
1932static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1933 struct ip_vs_dest_user *udest_compat)
1934{
1935 udest->addr.ip = udest_compat->addr;
1936 udest->port = udest_compat->port;
1937 udest->conn_flags = udest_compat->conn_flags;
1938 udest->weight = udest_compat->weight;
1939 udest->u_threshold = udest_compat->u_threshold;
1940 udest->l_threshold = udest_compat->l_threshold;
1941}
1942
1da177e4
LT
1943static int
1944do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1945{
1946 int ret;
1947 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
1948 struct ip_vs_service_user *usvc_compat;
1949 struct ip_vs_service_user_kern usvc;
1da177e4 1950 struct ip_vs_service *svc;
c860c6b1
JV
1951 struct ip_vs_dest_user *udest_compat;
1952 struct ip_vs_dest_user_kern udest;
1da177e4
LT
1953
1954 if (!capable(CAP_NET_ADMIN))
1955 return -EPERM;
1956
1957 if (len != set_arglen[SET_CMDID(cmd)]) {
1958 IP_VS_ERR("set_ctl: len %u != %u\n",
1959 len, set_arglen[SET_CMDID(cmd)]);
1960 return -EINVAL;
1961 }
1962
1963 if (copy_from_user(arg, user, len) != 0)
1964 return -EFAULT;
1965
1966 /* increase the module use count */
1967 ip_vs_use_count_inc();
1968
14cc3e2b 1969 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1970 ret = -ERESTARTSYS;
1971 goto out_dec;
1972 }
1973
1974 if (cmd == IP_VS_SO_SET_FLUSH) {
1975 /* Flush the virtual service */
1976 ret = ip_vs_flush();
1977 goto out_unlock;
1978 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1979 /* Set timeout values for (tcp tcpfin udp) */
1980 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1981 goto out_unlock;
1982 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1983 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1984 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1985 goto out_unlock;
1986 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1987 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1988 ret = stop_sync_thread(dm->state);
1989 goto out_unlock;
1990 }
1991
c860c6b1
JV
1992 usvc_compat = (struct ip_vs_service_user *)arg;
1993 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
1994
1995 /* We only use the new structs internally, so copy userspace compat
1996 * structs to extended internal versions */
1997 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
1998 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
1999
2000 if (cmd == IP_VS_SO_SET_ZERO) {
2001 /* if no service address is set, zero counters in all */
c860c6b1 2002 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2003 ret = ip_vs_zero_all();
2004 goto out_unlock;
2005 }
2006 }
2007
2008 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2009 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2010 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2011 usvc.protocol, NIPQUAD(usvc.addr.ip),
2012 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2013 ret = -EFAULT;
2014 goto out_unlock;
2015 }
2016
2017 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1
JV
2018 if (usvc.fwmark == 0)
2019 svc = __ip_vs_service_get(usvc.protocol,
2020 usvc.addr.ip, usvc.port);
1da177e4 2021 else
c860c6b1 2022 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
1da177e4
LT
2023
2024 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2025 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2026 ret = -ESRCH;
2027 goto out_unlock;
2028 }
2029
2030 switch (cmd) {
2031 case IP_VS_SO_SET_ADD:
2032 if (svc != NULL)
2033 ret = -EEXIST;
2034 else
c860c6b1 2035 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2036 break;
2037 case IP_VS_SO_SET_EDIT:
c860c6b1 2038 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2039 break;
2040 case IP_VS_SO_SET_DEL:
2041 ret = ip_vs_del_service(svc);
2042 if (!ret)
2043 goto out_unlock;
2044 break;
2045 case IP_VS_SO_SET_ZERO:
2046 ret = ip_vs_zero_service(svc);
2047 break;
2048 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2049 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2050 break;
2051 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2052 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2053 break;
2054 case IP_VS_SO_SET_DELDEST:
c860c6b1 2055 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2056 break;
2057 default:
2058 ret = -EINVAL;
2059 }
2060
2061 if (svc)
2062 ip_vs_service_put(svc);
2063
2064 out_unlock:
14cc3e2b 2065 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2066 out_dec:
2067 /* decrease the module use count */
2068 ip_vs_use_count_dec();
2069
2070 return ret;
2071}
2072
2073
2074static void
2075ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2076{
2077 spin_lock_bh(&src->lock);
2078 memcpy(dst, src, (char*)&src->lock - (char*)src);
2079 spin_unlock_bh(&src->lock);
2080}
2081
2082static void
2083ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2084{
2085 dst->protocol = src->protocol;
e7ade46a 2086 dst->addr = src->addr.ip;
1da177e4
LT
2087 dst->port = src->port;
2088 dst->fwmark = src->fwmark;
4da62fc7 2089 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2090 dst->flags = src->flags;
2091 dst->timeout = src->timeout / HZ;
2092 dst->netmask = src->netmask;
2093 dst->num_dests = src->num_dests;
2094 ip_vs_copy_stats(&dst->stats, &src->stats);
2095}
2096
2097static inline int
2098__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2099 struct ip_vs_get_services __user *uptr)
2100{
2101 int idx, count=0;
2102 struct ip_vs_service *svc;
2103 struct ip_vs_service_entry entry;
2104 int ret = 0;
2105
2106 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2107 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2108 if (count >= get->num_services)
2109 goto out;
4da62fc7 2110 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2111 ip_vs_copy_service(&entry, svc);
2112 if (copy_to_user(&uptr->entrytable[count],
2113 &entry, sizeof(entry))) {
2114 ret = -EFAULT;
2115 goto out;
2116 }
2117 count++;
2118 }
2119 }
2120
2121 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2122 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2123 if (count >= get->num_services)
2124 goto out;
4da62fc7 2125 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2126 ip_vs_copy_service(&entry, svc);
2127 if (copy_to_user(&uptr->entrytable[count],
2128 &entry, sizeof(entry))) {
2129 ret = -EFAULT;
2130 goto out;
2131 }
2132 count++;
2133 }
2134 }
2135 out:
2136 return ret;
2137}
2138
2139static inline int
2140__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2141 struct ip_vs_get_dests __user *uptr)
2142{
2143 struct ip_vs_service *svc;
2144 int ret = 0;
2145
2146 if (get->fwmark)
2147 svc = __ip_vs_svc_fwm_get(get->fwmark);
2148 else
2149 svc = __ip_vs_service_get(get->protocol,
2150 get->addr, get->port);
2151 if (svc) {
2152 int count = 0;
2153 struct ip_vs_dest *dest;
2154 struct ip_vs_dest_entry entry;
2155
2156 list_for_each_entry(dest, &svc->destinations, n_list) {
2157 if (count >= get->num_dests)
2158 break;
2159
e7ade46a 2160 entry.addr = dest->addr.ip;
1da177e4
LT
2161 entry.port = dest->port;
2162 entry.conn_flags = atomic_read(&dest->conn_flags);
2163 entry.weight = atomic_read(&dest->weight);
2164 entry.u_threshold = dest->u_threshold;
2165 entry.l_threshold = dest->l_threshold;
2166 entry.activeconns = atomic_read(&dest->activeconns);
2167 entry.inactconns = atomic_read(&dest->inactconns);
2168 entry.persistconns = atomic_read(&dest->persistconns);
2169 ip_vs_copy_stats(&entry.stats, &dest->stats);
2170 if (copy_to_user(&uptr->entrytable[count],
2171 &entry, sizeof(entry))) {
2172 ret = -EFAULT;
2173 break;
2174 }
2175 count++;
2176 }
2177 ip_vs_service_put(svc);
2178 } else
2179 ret = -ESRCH;
2180 return ret;
2181}
2182
2183static inline void
2184__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2185{
2186#ifdef CONFIG_IP_VS_PROTO_TCP
2187 u->tcp_timeout =
2188 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2189 u->tcp_fin_timeout =
2190 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2191#endif
2192#ifdef CONFIG_IP_VS_PROTO_UDP
2193 u->udp_timeout =
2194 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2195#endif
2196}
2197
2198
2199#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2200#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2201#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2202#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2203#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2204#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2205#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2206
9b5b5cff 2207static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2208 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2209 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2210 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2211 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2212 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2213 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2214 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2215};
2216
2217static int
2218do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2219{
2220 unsigned char arg[128];
2221 int ret = 0;
2222
2223 if (!capable(CAP_NET_ADMIN))
2224 return -EPERM;
2225
2226 if (*len < get_arglen[GET_CMDID(cmd)]) {
2227 IP_VS_ERR("get_ctl: len %u < %u\n",
2228 *len, get_arglen[GET_CMDID(cmd)]);
2229 return -EINVAL;
2230 }
2231
2232 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2233 return -EFAULT;
2234
14cc3e2b 2235 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2236 return -ERESTARTSYS;
2237
2238 switch (cmd) {
2239 case IP_VS_SO_GET_VERSION:
2240 {
2241 char buf[64];
2242
2243 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2244 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2245 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2246 ret = -EFAULT;
2247 goto out;
2248 }
2249 *len = strlen(buf)+1;
2250 }
2251 break;
2252
2253 case IP_VS_SO_GET_INFO:
2254 {
2255 struct ip_vs_getinfo info;
2256 info.version = IP_VS_VERSION_CODE;
2257 info.size = IP_VS_CONN_TAB_SIZE;
2258 info.num_services = ip_vs_num_services;
2259 if (copy_to_user(user, &info, sizeof(info)) != 0)
2260 ret = -EFAULT;
2261 }
2262 break;
2263
2264 case IP_VS_SO_GET_SERVICES:
2265 {
2266 struct ip_vs_get_services *get;
2267 int size;
2268
2269 get = (struct ip_vs_get_services *)arg;
2270 size = sizeof(*get) +
2271 sizeof(struct ip_vs_service_entry) * get->num_services;
2272 if (*len != size) {
2273 IP_VS_ERR("length: %u != %u\n", *len, size);
2274 ret = -EINVAL;
2275 goto out;
2276 }
2277 ret = __ip_vs_get_service_entries(get, user);
2278 }
2279 break;
2280
2281 case IP_VS_SO_GET_SERVICE:
2282 {
2283 struct ip_vs_service_entry *entry;
2284 struct ip_vs_service *svc;
2285
2286 entry = (struct ip_vs_service_entry *)arg;
2287 if (entry->fwmark)
2288 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2289 else
2290 svc = __ip_vs_service_get(entry->protocol,
2291 entry->addr, entry->port);
2292 if (svc) {
2293 ip_vs_copy_service(entry, svc);
2294 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2295 ret = -EFAULT;
2296 ip_vs_service_put(svc);
2297 } else
2298 ret = -ESRCH;
2299 }
2300 break;
2301
2302 case IP_VS_SO_GET_DESTS:
2303 {
2304 struct ip_vs_get_dests *get;
2305 int size;
2306
2307 get = (struct ip_vs_get_dests *)arg;
2308 size = sizeof(*get) +
2309 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2310 if (*len != size) {
2311 IP_VS_ERR("length: %u != %u\n", *len, size);
2312 ret = -EINVAL;
2313 goto out;
2314 }
2315 ret = __ip_vs_get_dest_entries(get, user);
2316 }
2317 break;
2318
2319 case IP_VS_SO_GET_TIMEOUT:
2320 {
2321 struct ip_vs_timeout_user t;
2322
2323 __ip_vs_get_timeouts(&t);
2324 if (copy_to_user(user, &t, sizeof(t)) != 0)
2325 ret = -EFAULT;
2326 }
2327 break;
2328
2329 case IP_VS_SO_GET_DAEMON:
2330 {
2331 struct ip_vs_daemon_user d[2];
2332
2333 memset(&d, 0, sizeof(d));
2334 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2335 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2336 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2337 d[0].syncid = ip_vs_master_syncid;
2338 }
2339 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2340 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2341 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2342 d[1].syncid = ip_vs_backup_syncid;
2343 }
2344 if (copy_to_user(user, &d, sizeof(d)) != 0)
2345 ret = -EFAULT;
2346 }
2347 break;
2348
2349 default:
2350 ret = -EINVAL;
2351 }
2352
2353 out:
14cc3e2b 2354 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2355 return ret;
2356}
2357
2358
2359static struct nf_sockopt_ops ip_vs_sockopts = {
2360 .pf = PF_INET,
2361 .set_optmin = IP_VS_BASE_CTL,
2362 .set_optmax = IP_VS_SO_SET_MAX+1,
2363 .set = do_ip_vs_set_ctl,
2364 .get_optmin = IP_VS_BASE_CTL,
2365 .get_optmax = IP_VS_SO_GET_MAX+1,
2366 .get = do_ip_vs_get_ctl,
16fcec35 2367 .owner = THIS_MODULE,
1da177e4
LT
2368};
2369
9a812198
JV
2370/*
2371 * Generic Netlink interface
2372 */
2373
2374/* IPVS genetlink family */
2375static struct genl_family ip_vs_genl_family = {
2376 .id = GENL_ID_GENERATE,
2377 .hdrsize = 0,
2378 .name = IPVS_GENL_NAME,
2379 .version = IPVS_GENL_VERSION,
2380 .maxattr = IPVS_CMD_MAX,
2381};
2382
2383/* Policy used for first-level command attributes */
2384static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2385 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2386 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2387 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2388 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2389 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2390 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2391};
2392
2393/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2394static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2395 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2396 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2397 .len = IP_VS_IFNAME_MAXLEN },
2398 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2399};
2400
2401/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2402static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2403 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2404 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2405 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2406 .len = sizeof(union nf_inet_addr) },
2407 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2408 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2409 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2410 .len = IP_VS_SCHEDNAME_MAXLEN },
2411 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2412 .len = sizeof(struct ip_vs_flags) },
2413 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2414 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2415 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2416};
2417
2418/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2419static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2420 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2421 .len = sizeof(union nf_inet_addr) },
2422 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2423 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2424 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2425 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2426 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2427 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2428 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2429 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2430 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2431};
2432
2433static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2434 struct ip_vs_stats *stats)
2435{
2436 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2437 if (!nl_stats)
2438 return -EMSGSIZE;
2439
2440 spin_lock_bh(&stats->lock);
2441
2442 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2443 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2444 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2445 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2446 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2447 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2448 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2449 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2450 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2451 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2452
2453 spin_unlock_bh(&stats->lock);
2454
2455 nla_nest_end(skb, nl_stats);
2456
2457 return 0;
2458
2459nla_put_failure:
2460 spin_unlock_bh(&stats->lock);
2461 nla_nest_cancel(skb, nl_stats);
2462 return -EMSGSIZE;
2463}
2464
2465static int ip_vs_genl_fill_service(struct sk_buff *skb,
2466 struct ip_vs_service *svc)
2467{
2468 struct nlattr *nl_service;
2469 struct ip_vs_flags flags = { .flags = svc->flags,
2470 .mask = ~0 };
2471
2472 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2473 if (!nl_service)
2474 return -EMSGSIZE;
2475
2476 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2477
2478 if (svc->fwmark) {
2479 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2480 } else {
2481 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2482 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2483 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2484 }
2485
2486 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2487 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2488 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2489 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2490
2491 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2492 goto nla_put_failure;
2493
2494 nla_nest_end(skb, nl_service);
2495
2496 return 0;
2497
2498nla_put_failure:
2499 nla_nest_cancel(skb, nl_service);
2500 return -EMSGSIZE;
2501}
2502
2503static int ip_vs_genl_dump_service(struct sk_buff *skb,
2504 struct ip_vs_service *svc,
2505 struct netlink_callback *cb)
2506{
2507 void *hdr;
2508
2509 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2510 &ip_vs_genl_family, NLM_F_MULTI,
2511 IPVS_CMD_NEW_SERVICE);
2512 if (!hdr)
2513 return -EMSGSIZE;
2514
2515 if (ip_vs_genl_fill_service(skb, svc) < 0)
2516 goto nla_put_failure;
2517
2518 return genlmsg_end(skb, hdr);
2519
2520nla_put_failure:
2521 genlmsg_cancel(skb, hdr);
2522 return -EMSGSIZE;
2523}
2524
2525static int ip_vs_genl_dump_services(struct sk_buff *skb,
2526 struct netlink_callback *cb)
2527{
2528 int idx = 0, i;
2529 int start = cb->args[0];
2530 struct ip_vs_service *svc;
2531
2532 mutex_lock(&__ip_vs_mutex);
2533 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2534 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2535 if (++idx <= start)
2536 continue;
2537 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2538 idx--;
2539 goto nla_put_failure;
2540 }
2541 }
2542 }
2543
2544 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2545 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2546 if (++idx <= start)
2547 continue;
2548 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2549 idx--;
2550 goto nla_put_failure;
2551 }
2552 }
2553 }
2554
2555nla_put_failure:
2556 mutex_unlock(&__ip_vs_mutex);
2557 cb->args[0] = idx;
2558
2559 return skb->len;
2560}
2561
c860c6b1 2562static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2563 struct nlattr *nla, int full_entry)
2564{
2565 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2566 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2567
2568 /* Parse mandatory identifying service fields first */
2569 if (nla == NULL ||
2570 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2571 return -EINVAL;
2572
2573 nla_af = attrs[IPVS_SVC_ATTR_AF];
2574 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2575 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2576 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2577 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2578
2579 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2580 return -EINVAL;
2581
c860c6b1 2582 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2583 /* For now, only support IPv4 */
2584 if (nla_get_u16(nla_af) != AF_INET)
2585 return -EAFNOSUPPORT;
2586
2587 if (nla_fwmark) {
2588 usvc->protocol = IPPROTO_TCP;
2589 usvc->fwmark = nla_get_u32(nla_fwmark);
2590 } else {
2591 usvc->protocol = nla_get_u16(nla_protocol);
2592 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2593 usvc->port = nla_get_u16(nla_port);
2594 usvc->fwmark = 0;
2595 }
2596
2597 /* If a full entry was requested, check for the additional fields */
2598 if (full_entry) {
2599 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2600 *nla_netmask;
2601 struct ip_vs_flags flags;
2602 struct ip_vs_service *svc;
2603
2604 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2605 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2606 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2607 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2608
2609 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2610 return -EINVAL;
2611
2612 nla_memcpy(&flags, nla_flags, sizeof(flags));
2613
2614 /* prefill flags from service if it already exists */
2615 if (usvc->fwmark)
2616 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2617 else
c860c6b1 2618 svc = __ip_vs_service_get(usvc->protocol, usvc->addr.ip,
9a812198
JV
2619 usvc->port);
2620 if (svc) {
2621 usvc->flags = svc->flags;
2622 ip_vs_service_put(svc);
2623 } else
2624 usvc->flags = 0;
2625
2626 /* set new flags from userland */
2627 usvc->flags = (usvc->flags & ~flags.mask) |
2628 (flags.flags & flags.mask);
c860c6b1 2629 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2630 usvc->timeout = nla_get_u32(nla_timeout);
2631 usvc->netmask = nla_get_u32(nla_netmask);
2632 }
2633
2634 return 0;
2635}
2636
2637static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2638{
c860c6b1 2639 struct ip_vs_service_user_kern usvc;
9a812198
JV
2640 int ret;
2641
2642 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2643 if (ret)
2644 return ERR_PTR(ret);
2645
2646 if (usvc.fwmark)
2647 return __ip_vs_svc_fwm_get(usvc.fwmark);
2648 else
c860c6b1 2649 return __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
9a812198
JV
2650 usvc.port);
2651}
2652
2653static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2654{
2655 struct nlattr *nl_dest;
2656
2657 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2658 if (!nl_dest)
2659 return -EMSGSIZE;
2660
2661 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2662 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2663
2664 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2665 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2666 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2667 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2668 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2669 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2670 atomic_read(&dest->activeconns));
2671 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2672 atomic_read(&dest->inactconns));
2673 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2674 atomic_read(&dest->persistconns));
2675
2676 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2677 goto nla_put_failure;
2678
2679 nla_nest_end(skb, nl_dest);
2680
2681 return 0;
2682
2683nla_put_failure:
2684 nla_nest_cancel(skb, nl_dest);
2685 return -EMSGSIZE;
2686}
2687
2688static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2689 struct netlink_callback *cb)
2690{
2691 void *hdr;
2692
2693 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2694 &ip_vs_genl_family, NLM_F_MULTI,
2695 IPVS_CMD_NEW_DEST);
2696 if (!hdr)
2697 return -EMSGSIZE;
2698
2699 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2700 goto nla_put_failure;
2701
2702 return genlmsg_end(skb, hdr);
2703
2704nla_put_failure:
2705 genlmsg_cancel(skb, hdr);
2706 return -EMSGSIZE;
2707}
2708
2709static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2710 struct netlink_callback *cb)
2711{
2712 int idx = 0;
2713 int start = cb->args[0];
2714 struct ip_vs_service *svc;
2715 struct ip_vs_dest *dest;
2716 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2717
2718 mutex_lock(&__ip_vs_mutex);
2719
2720 /* Try to find the service for which to dump destinations */
2721 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2722 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2723 goto out_err;
2724
2725 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2726 if (IS_ERR(svc) || svc == NULL)
2727 goto out_err;
2728
2729 /* Dump the destinations */
2730 list_for_each_entry(dest, &svc->destinations, n_list) {
2731 if (++idx <= start)
2732 continue;
2733 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2734 idx--;
2735 goto nla_put_failure;
2736 }
2737 }
2738
2739nla_put_failure:
2740 cb->args[0] = idx;
2741 ip_vs_service_put(svc);
2742
2743out_err:
2744 mutex_unlock(&__ip_vs_mutex);
2745
2746 return skb->len;
2747}
2748
c860c6b1 2749static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2750 struct nlattr *nla, int full_entry)
2751{
2752 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2753 struct nlattr *nla_addr, *nla_port;
2754
2755 /* Parse mandatory identifying destination fields first */
2756 if (nla == NULL ||
2757 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2758 return -EINVAL;
2759
2760 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2761 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2762
2763 if (!(nla_addr && nla_port))
2764 return -EINVAL;
2765
2766 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2767 udest->port = nla_get_u16(nla_port);
2768
2769 /* If a full entry was requested, check for the additional fields */
2770 if (full_entry) {
2771 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2772 *nla_l_thresh;
2773
2774 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2775 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2776 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2777 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2778
2779 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2780 return -EINVAL;
2781
2782 udest->conn_flags = nla_get_u32(nla_fwd)
2783 & IP_VS_CONN_F_FWD_MASK;
2784 udest->weight = nla_get_u32(nla_weight);
2785 udest->u_threshold = nla_get_u32(nla_u_thresh);
2786 udest->l_threshold = nla_get_u32(nla_l_thresh);
2787 }
2788
2789 return 0;
2790}
2791
2792static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2793 const char *mcast_ifn, __be32 syncid)
2794{
2795 struct nlattr *nl_daemon;
2796
2797 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2798 if (!nl_daemon)
2799 return -EMSGSIZE;
2800
2801 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2802 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2803 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2804
2805 nla_nest_end(skb, nl_daemon);
2806
2807 return 0;
2808
2809nla_put_failure:
2810 nla_nest_cancel(skb, nl_daemon);
2811 return -EMSGSIZE;
2812}
2813
2814static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2815 const char *mcast_ifn, __be32 syncid,
2816 struct netlink_callback *cb)
2817{
2818 void *hdr;
2819 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2820 &ip_vs_genl_family, NLM_F_MULTI,
2821 IPVS_CMD_NEW_DAEMON);
2822 if (!hdr)
2823 return -EMSGSIZE;
2824
2825 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2826 goto nla_put_failure;
2827
2828 return genlmsg_end(skb, hdr);
2829
2830nla_put_failure:
2831 genlmsg_cancel(skb, hdr);
2832 return -EMSGSIZE;
2833}
2834
2835static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2836 struct netlink_callback *cb)
2837{
2838 mutex_lock(&__ip_vs_mutex);
2839 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2840 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2841 ip_vs_master_mcast_ifn,
2842 ip_vs_master_syncid, cb) < 0)
2843 goto nla_put_failure;
2844
2845 cb->args[0] = 1;
2846 }
2847
2848 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2849 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2850 ip_vs_backup_mcast_ifn,
2851 ip_vs_backup_syncid, cb) < 0)
2852 goto nla_put_failure;
2853
2854 cb->args[1] = 1;
2855 }
2856
2857nla_put_failure:
2858 mutex_unlock(&__ip_vs_mutex);
2859
2860 return skb->len;
2861}
2862
2863static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2864{
2865 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2866 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2867 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2868 return -EINVAL;
2869
2870 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2871 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2872 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2873}
2874
2875static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2876{
2877 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2878 return -EINVAL;
2879
2880 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2881}
2882
2883static int ip_vs_genl_set_config(struct nlattr **attrs)
2884{
2885 struct ip_vs_timeout_user t;
2886
2887 __ip_vs_get_timeouts(&t);
2888
2889 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2890 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2891
2892 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2893 t.tcp_fin_timeout =
2894 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2895
2896 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2897 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2898
2899 return ip_vs_set_timeout(&t);
2900}
2901
2902static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2903{
2904 struct ip_vs_service *svc = NULL;
c860c6b1
JV
2905 struct ip_vs_service_user_kern usvc;
2906 struct ip_vs_dest_user_kern udest;
9a812198
JV
2907 int ret = 0, cmd;
2908 int need_full_svc = 0, need_full_dest = 0;
2909
2910 cmd = info->genlhdr->cmd;
2911
2912 mutex_lock(&__ip_vs_mutex);
2913
2914 if (cmd == IPVS_CMD_FLUSH) {
2915 ret = ip_vs_flush();
2916 goto out;
2917 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2918 ret = ip_vs_genl_set_config(info->attrs);
2919 goto out;
2920 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2921 cmd == IPVS_CMD_DEL_DAEMON) {
2922
2923 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2924
2925 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2926 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2927 info->attrs[IPVS_CMD_ATTR_DAEMON],
2928 ip_vs_daemon_policy)) {
2929 ret = -EINVAL;
2930 goto out;
2931 }
2932
2933 if (cmd == IPVS_CMD_NEW_DAEMON)
2934 ret = ip_vs_genl_new_daemon(daemon_attrs);
2935 else
2936 ret = ip_vs_genl_del_daemon(daemon_attrs);
2937 goto out;
2938 } else if (cmd == IPVS_CMD_ZERO &&
2939 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2940 ret = ip_vs_zero_all();
2941 goto out;
2942 }
2943
2944 /* All following commands require a service argument, so check if we
2945 * received a valid one. We need a full service specification when
2946 * adding / editing a service. Only identifying members otherwise. */
2947 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2948 need_full_svc = 1;
2949
2950 ret = ip_vs_genl_parse_service(&usvc,
2951 info->attrs[IPVS_CMD_ATTR_SERVICE],
2952 need_full_svc);
2953 if (ret)
2954 goto out;
2955
2956 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2957 if (usvc.fwmark == 0)
c860c6b1
JV
2958 svc = __ip_vs_service_get(usvc.protocol, usvc.addr.ip,
2959 usvc.port);
9a812198
JV
2960 else
2961 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2962
2963 /* Unless we're adding a new service, the service must already exist */
2964 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2965 ret = -ESRCH;
2966 goto out;
2967 }
2968
2969 /* Destination commands require a valid destination argument. For
2970 * adding / editing a destination, we need a full destination
2971 * specification. */
2972 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2973 cmd == IPVS_CMD_DEL_DEST) {
2974 if (cmd != IPVS_CMD_DEL_DEST)
2975 need_full_dest = 1;
2976
2977 ret = ip_vs_genl_parse_dest(&udest,
2978 info->attrs[IPVS_CMD_ATTR_DEST],
2979 need_full_dest);
2980 if (ret)
2981 goto out;
2982 }
2983
2984 switch (cmd) {
2985 case IPVS_CMD_NEW_SERVICE:
2986 if (svc == NULL)
2987 ret = ip_vs_add_service(&usvc, &svc);
2988 else
2989 ret = -EEXIST;
2990 break;
2991 case IPVS_CMD_SET_SERVICE:
2992 ret = ip_vs_edit_service(svc, &usvc);
2993 break;
2994 case IPVS_CMD_DEL_SERVICE:
2995 ret = ip_vs_del_service(svc);
2996 break;
2997 case IPVS_CMD_NEW_DEST:
2998 ret = ip_vs_add_dest(svc, &udest);
2999 break;
3000 case IPVS_CMD_SET_DEST:
3001 ret = ip_vs_edit_dest(svc, &udest);
3002 break;
3003 case IPVS_CMD_DEL_DEST:
3004 ret = ip_vs_del_dest(svc, &udest);
3005 break;
3006 case IPVS_CMD_ZERO:
3007 ret = ip_vs_zero_service(svc);
3008 break;
3009 default:
3010 ret = -EINVAL;
3011 }
3012
3013out:
3014 if (svc)
3015 ip_vs_service_put(svc);
3016 mutex_unlock(&__ip_vs_mutex);
3017
3018 return ret;
3019}
3020
3021static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3022{
3023 struct sk_buff *msg;
3024 void *reply;
3025 int ret, cmd, reply_cmd;
3026
3027 cmd = info->genlhdr->cmd;
3028
3029 if (cmd == IPVS_CMD_GET_SERVICE)
3030 reply_cmd = IPVS_CMD_NEW_SERVICE;
3031 else if (cmd == IPVS_CMD_GET_INFO)
3032 reply_cmd = IPVS_CMD_SET_INFO;
3033 else if (cmd == IPVS_CMD_GET_CONFIG)
3034 reply_cmd = IPVS_CMD_SET_CONFIG;
3035 else {
3036 IP_VS_ERR("unknown Generic Netlink command\n");
3037 return -EINVAL;
3038 }
3039
3040 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3041 if (!msg)
3042 return -ENOMEM;
3043
3044 mutex_lock(&__ip_vs_mutex);
3045
3046 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3047 if (reply == NULL)
3048 goto nla_put_failure;
3049
3050 switch (cmd) {
3051 case IPVS_CMD_GET_SERVICE:
3052 {
3053 struct ip_vs_service *svc;
3054
3055 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3056 if (IS_ERR(svc)) {
3057 ret = PTR_ERR(svc);
3058 goto out_err;
3059 } else if (svc) {
3060 ret = ip_vs_genl_fill_service(msg, svc);
3061 ip_vs_service_put(svc);
3062 if (ret)
3063 goto nla_put_failure;
3064 } else {
3065 ret = -ESRCH;
3066 goto out_err;
3067 }
3068
3069 break;
3070 }
3071
3072 case IPVS_CMD_GET_CONFIG:
3073 {
3074 struct ip_vs_timeout_user t;
3075
3076 __ip_vs_get_timeouts(&t);
3077#ifdef CONFIG_IP_VS_PROTO_TCP
3078 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3079 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3080 t.tcp_fin_timeout);
3081#endif
3082#ifdef CONFIG_IP_VS_PROTO_UDP
3083 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3084#endif
3085
3086 break;
3087 }
3088
3089 case IPVS_CMD_GET_INFO:
3090 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3091 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3092 IP_VS_CONN_TAB_SIZE);
3093 break;
3094 }
3095
3096 genlmsg_end(msg, reply);
3097 ret = genlmsg_unicast(msg, info->snd_pid);
3098 goto out;
3099
3100nla_put_failure:
3101 IP_VS_ERR("not enough space in Netlink message\n");
3102 ret = -EMSGSIZE;
3103
3104out_err:
3105 nlmsg_free(msg);
3106out:
3107 mutex_unlock(&__ip_vs_mutex);
3108
3109 return ret;
3110}
3111
3112
3113static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3114 {
3115 .cmd = IPVS_CMD_NEW_SERVICE,
3116 .flags = GENL_ADMIN_PERM,
3117 .policy = ip_vs_cmd_policy,
3118 .doit = ip_vs_genl_set_cmd,
3119 },
3120 {
3121 .cmd = IPVS_CMD_SET_SERVICE,
3122 .flags = GENL_ADMIN_PERM,
3123 .policy = ip_vs_cmd_policy,
3124 .doit = ip_vs_genl_set_cmd,
3125 },
3126 {
3127 .cmd = IPVS_CMD_DEL_SERVICE,
3128 .flags = GENL_ADMIN_PERM,
3129 .policy = ip_vs_cmd_policy,
3130 .doit = ip_vs_genl_set_cmd,
3131 },
3132 {
3133 .cmd = IPVS_CMD_GET_SERVICE,
3134 .flags = GENL_ADMIN_PERM,
3135 .doit = ip_vs_genl_get_cmd,
3136 .dumpit = ip_vs_genl_dump_services,
3137 .policy = ip_vs_cmd_policy,
3138 },
3139 {
3140 .cmd = IPVS_CMD_NEW_DEST,
3141 .flags = GENL_ADMIN_PERM,
3142 .policy = ip_vs_cmd_policy,
3143 .doit = ip_vs_genl_set_cmd,
3144 },
3145 {
3146 .cmd = IPVS_CMD_SET_DEST,
3147 .flags = GENL_ADMIN_PERM,
3148 .policy = ip_vs_cmd_policy,
3149 .doit = ip_vs_genl_set_cmd,
3150 },
3151 {
3152 .cmd = IPVS_CMD_DEL_DEST,
3153 .flags = GENL_ADMIN_PERM,
3154 .policy = ip_vs_cmd_policy,
3155 .doit = ip_vs_genl_set_cmd,
3156 },
3157 {
3158 .cmd = IPVS_CMD_GET_DEST,
3159 .flags = GENL_ADMIN_PERM,
3160 .policy = ip_vs_cmd_policy,
3161 .dumpit = ip_vs_genl_dump_dests,
3162 },
3163 {
3164 .cmd = IPVS_CMD_NEW_DAEMON,
3165 .flags = GENL_ADMIN_PERM,
3166 .policy = ip_vs_cmd_policy,
3167 .doit = ip_vs_genl_set_cmd,
3168 },
3169 {
3170 .cmd = IPVS_CMD_DEL_DAEMON,
3171 .flags = GENL_ADMIN_PERM,
3172 .policy = ip_vs_cmd_policy,
3173 .doit = ip_vs_genl_set_cmd,
3174 },
3175 {
3176 .cmd = IPVS_CMD_GET_DAEMON,
3177 .flags = GENL_ADMIN_PERM,
3178 .dumpit = ip_vs_genl_dump_daemons,
3179 },
3180 {
3181 .cmd = IPVS_CMD_SET_CONFIG,
3182 .flags = GENL_ADMIN_PERM,
3183 .policy = ip_vs_cmd_policy,
3184 .doit = ip_vs_genl_set_cmd,
3185 },
3186 {
3187 .cmd = IPVS_CMD_GET_CONFIG,
3188 .flags = GENL_ADMIN_PERM,
3189 .doit = ip_vs_genl_get_cmd,
3190 },
3191 {
3192 .cmd = IPVS_CMD_GET_INFO,
3193 .flags = GENL_ADMIN_PERM,
3194 .doit = ip_vs_genl_get_cmd,
3195 },
3196 {
3197 .cmd = IPVS_CMD_ZERO,
3198 .flags = GENL_ADMIN_PERM,
3199 .policy = ip_vs_cmd_policy,
3200 .doit = ip_vs_genl_set_cmd,
3201 },
3202 {
3203 .cmd = IPVS_CMD_FLUSH,
3204 .flags = GENL_ADMIN_PERM,
3205 .doit = ip_vs_genl_set_cmd,
3206 },
3207};
3208
3209static int __init ip_vs_genl_register(void)
3210{
3211 int ret, i;
3212
3213 ret = genl_register_family(&ip_vs_genl_family);
3214 if (ret)
3215 return ret;
3216
3217 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3218 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3219 if (ret)
3220 goto err_out;
3221 }
3222 return 0;
3223
3224err_out:
3225 genl_unregister_family(&ip_vs_genl_family);
3226 return ret;
3227}
3228
3229static void ip_vs_genl_unregister(void)
3230{
3231 genl_unregister_family(&ip_vs_genl_family);
3232}
3233
3234/* End of Generic Netlink interface definitions */
3235
1da177e4 3236
048cf48b 3237int __init ip_vs_control_init(void)
1da177e4
LT
3238{
3239 int ret;
3240 int idx;
3241
3242 EnterFunction(2);
3243
3244 ret = nf_register_sockopt(&ip_vs_sockopts);
3245 if (ret) {
3246 IP_VS_ERR("cannot register sockopt.\n");
3247 return ret;
3248 }
3249
9a812198
JV
3250 ret = ip_vs_genl_register();
3251 if (ret) {
3252 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3253 nf_unregister_sockopt(&ip_vs_sockopts);
3254 return ret;
3255 }
3256
457c4cbc
EB
3257 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3258 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3259
90754f8e 3260 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3261
3262 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3263 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3264 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3265 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3266 }
3267 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3268 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3269 }
3270
1da177e4
LT
3271 ip_vs_new_estimator(&ip_vs_stats);
3272
3273 /* Hook the defense timer */
3274 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3275
3276 LeaveFunction(2);
3277 return 0;
3278}
3279
3280
3281void ip_vs_control_cleanup(void)
3282{
3283 EnterFunction(2);
3284 ip_vs_trash_cleanup();
3285 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3286 cancel_work_sync(&defense_work.work);
1da177e4
LT
3287 ip_vs_kill_estimator(&ip_vs_stats);
3288 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3289 proc_net_remove(&init_net, "ip_vs_stats");
3290 proc_net_remove(&init_net, "ip_vs");
9a812198 3291 ip_vs_genl_unregister();
1da177e4
LT
3292 nf_unregister_sockopt(&ip_vs_sockopts);
3293 LeaveFunction(2);
3294}