]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
[NET]: Add a network namespace parameter to struct sock
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * Changes:
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/types.h>
4fc268d2 26#include <linux/capability.h>
1da177e4
LT
27#include <linux/fs.h>
28#include <linux/sysctl.h>
29#include <linux/proc_fs.h>
30#include <linux/workqueue.h>
31#include <linux/swap.h>
1da177e4
LT
32#include <linux/seq_file.h>
33
34#include <linux/netfilter.h>
35#include <linux/netfilter_ipv4.h>
14cc3e2b 36#include <linux/mutex.h>
1da177e4
LT
37
38#include <net/ip.h>
14c85021 39#include <net/route.h>
1da177e4
LT
40#include <net/sock.h>
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
014d730d 285ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
1da177e4
LT
286{
287 register unsigned porth = ntohs(port);
288
289 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290 & IP_VS_SVC_TAB_MASK;
291}
292
293/*
294 * Returns hash value of fwmark for virtual service lookup
295 */
296static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297{
298 return fwmark & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303 * or in the ip_vs_svc_fwm_table by fwmark.
304 * Should be called with locked tables.
305 */
306static int ip_vs_svc_hash(struct ip_vs_service *svc)
307{
308 unsigned hash;
309
310 if (svc->flags & IP_VS_SVC_F_HASHED) {
311 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312 "called from %p\n", __builtin_return_address(0));
313 return 0;
314 }
315
316 if (svc->fwmark == 0) {
317 /*
318 * Hash it by <protocol,addr,port> in ip_vs_svc_table
319 */
320 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
321 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322 } else {
323 /*
324 * Hash it by fwmark in ip_vs_svc_fwm_table
325 */
326 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
327 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328 }
329
330 svc->flags |= IP_VS_SVC_F_HASHED;
331 /* increase its refcnt because it is referenced by the svc table */
332 atomic_inc(&svc->refcnt);
333 return 1;
334}
335
336
337/*
338 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339 * Should be called with locked tables.
340 */
341static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342{
343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
344 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345 "called from %p\n", __builtin_return_address(0));
346 return 0;
347 }
348
349 if (svc->fwmark == 0) {
350 /* Remove it from the ip_vs_svc_table table */
351 list_del(&svc->s_list);
352 } else {
353 /* Remove it from the ip_vs_svc_fwm_table table */
354 list_del(&svc->f_list);
355 }
356
357 svc->flags &= ~IP_VS_SVC_F_HASHED;
358 atomic_dec(&svc->refcnt);
359 return 1;
360}
361
362
363/*
364 * Get service by {proto,addr,port} in the service table.
365 */
366static __inline__ struct ip_vs_service *
014d730d 367__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
368{
369 unsigned hash;
370 struct ip_vs_service *svc;
371
372 /* Check for "full" addressed entries */
373 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
374
375 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
376 if ((svc->addr == vaddr)
377 && (svc->port == vport)
378 && (svc->protocol == protocol)) {
379 /* HIT */
380 atomic_inc(&svc->usecnt);
381 return svc;
382 }
383 }
384
385 return NULL;
386}
387
388
389/*
390 * Get service by {fwmark} in the service table.
391 */
392static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
393{
394 unsigned hash;
395 struct ip_vs_service *svc;
396
397 /* Check for fwmark addressed entries */
398 hash = ip_vs_svc_fwm_hashkey(fwmark);
399
400 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
401 if (svc->fwmark == fwmark) {
402 /* HIT */
403 atomic_inc(&svc->usecnt);
404 return svc;
405 }
406 }
407
408 return NULL;
409}
410
411struct ip_vs_service *
014d730d 412ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
413{
414 struct ip_vs_service *svc;
415
416 read_lock(&__ip_vs_svc_lock);
417
418 /*
419 * Check the table hashed by fwmark first
420 */
421 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
422 goto out;
423
424 /*
425 * Check the table hashed by <protocol,addr,port>
426 * for "full" addressed entries
427 */
428 svc = __ip_vs_service_get(protocol, vaddr, vport);
429
430 if (svc == NULL
431 && protocol == IPPROTO_TCP
432 && atomic_read(&ip_vs_ftpsvc_counter)
433 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434 /*
435 * Check if ftp service entry exists, the packet
436 * might belong to FTP data connections.
437 */
438 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
439 }
440
441 if (svc == NULL
442 && atomic_read(&ip_vs_nullsvc_counter)) {
443 /*
444 * Check if the catch-all port (port zero) exists
445 */
446 svc = __ip_vs_service_get(protocol, vaddr, 0);
447 }
448
449 out:
450 read_unlock(&__ip_vs_svc_lock);
451
4b5bdf5c 452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
453 fwmark, ip_vs_proto_name(protocol),
454 NIPQUAD(vaddr), ntohs(vport),
455 svc?"hit":"not hit");
456
457 return svc;
458}
459
460
461static inline void
462__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463{
464 atomic_inc(&svc->refcnt);
465 dest->svc = svc;
466}
467
468static inline void
469__ip_vs_unbind_svc(struct ip_vs_dest *dest)
470{
471 struct ip_vs_service *svc = dest->svc;
472
473 dest->svc = NULL;
474 if (atomic_dec_and_test(&svc->refcnt))
475 kfree(svc);
476}
477
478
479/*
480 * Returns hash value for real service
481 */
014d730d 482static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
483{
484 register unsigned porth = ntohs(port);
485
486 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
487 & IP_VS_RTAB_MASK;
488}
489
490/*
491 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492 * should be called with locked tables.
493 */
494static int ip_vs_rs_hash(struct ip_vs_dest *dest)
495{
496 unsigned hash;
497
498 if (!list_empty(&dest->d_list)) {
499 return 0;
500 }
501
502 /*
503 * Hash by proto,addr,port,
504 * which are the parameters of the real service.
505 */
506 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
507 list_add(&dest->d_list, &ip_vs_rtable[hash]);
508
509 return 1;
510}
511
512/*
513 * UNhashes ip_vs_dest from ip_vs_rtable.
514 * should be called with locked tables.
515 */
516static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
517{
518 /*
519 * Remove it from the ip_vs_rtable table.
520 */
521 if (!list_empty(&dest->d_list)) {
522 list_del(&dest->d_list);
523 INIT_LIST_HEAD(&dest->d_list);
524 }
525
526 return 1;
527}
528
529/*
530 * Lookup real service by <proto,addr,port> in the real service table.
531 */
532struct ip_vs_dest *
014d730d 533ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
534{
535 unsigned hash;
536 struct ip_vs_dest *dest;
537
538 /*
539 * Check for "full" addressed entries
540 * Return the first found entry
541 */
542 hash = ip_vs_rs_hashkey(daddr, dport);
543
544 read_lock(&__ip_vs_rs_lock);
545 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
546 if ((dest->addr == daddr)
547 && (dest->port == dport)
548 && ((dest->protocol == protocol) ||
549 dest->vfwmark)) {
550 /* HIT */
551 read_unlock(&__ip_vs_rs_lock);
552 return dest;
553 }
554 }
555 read_unlock(&__ip_vs_rs_lock);
556
557 return NULL;
558}
559
560/*
561 * Lookup destination by {addr,port} in the given service
562 */
563static struct ip_vs_dest *
014d730d 564ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
565{
566 struct ip_vs_dest *dest;
567
568 /*
569 * Find the destination for the given service
570 */
571 list_for_each_entry(dest, &svc->destinations, n_list) {
572 if ((dest->addr == daddr) && (dest->port == dport)) {
573 /* HIT */
574 return dest;
575 }
576 }
577
578 return NULL;
579}
580
581
582/*
583 * Lookup dest by {svc,addr,port} in the destination trash.
584 * The destination trash is used to hold the destinations that are removed
585 * from the service table but are still referenced by some conn entries.
586 * The reason to add the destination trash is when the dest is temporary
587 * down (either by administrator or by monitor program), the dest can be
588 * picked back from the trash, the remaining connections to the dest can
589 * continue, and the counting information of the dest is also useful for
590 * scheduling.
591 */
592static struct ip_vs_dest *
014d730d 593ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
594{
595 struct ip_vs_dest *dest, *nxt;
596
597 /*
598 * Find the destination in trash
599 */
600 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
601 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 602 "dest->refcnt=%d\n",
1da177e4
LT
603 dest->vfwmark,
604 NIPQUAD(dest->addr), ntohs(dest->port),
605 atomic_read(&dest->refcnt));
606 if (dest->addr == daddr &&
607 dest->port == dport &&
608 dest->vfwmark == svc->fwmark &&
609 dest->protocol == svc->protocol &&
610 (svc->fwmark ||
611 (dest->vaddr == svc->addr &&
612 dest->vport == svc->port))) {
613 /* HIT */
614 return dest;
615 }
616
617 /*
618 * Try to purge the destination from trash if not referenced
619 */
620 if (atomic_read(&dest->refcnt) == 1) {
621 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
622 "from trash\n",
623 dest->vfwmark,
624 NIPQUAD(dest->addr), ntohs(dest->port));
625 list_del(&dest->n_list);
626 ip_vs_dst_reset(dest);
627 __ip_vs_unbind_svc(dest);
628 kfree(dest);
629 }
630 }
631
632 return NULL;
633}
634
635
636/*
637 * Clean up all the destinations in the trash
638 * Called by the ip_vs_control_cleanup()
639 *
640 * When the ip_vs_control_clearup is activated by ipvs module exit,
641 * the service tables must have been flushed and all the connections
642 * are expired, and the refcnt of each destination in the trash must
643 * be 1, so we simply release them here.
644 */
645static void ip_vs_trash_cleanup(void)
646{
647 struct ip_vs_dest *dest, *nxt;
648
649 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
650 list_del(&dest->n_list);
651 ip_vs_dst_reset(dest);
652 __ip_vs_unbind_svc(dest);
653 kfree(dest);
654 }
655}
656
657
658static void
659ip_vs_zero_stats(struct ip_vs_stats *stats)
660{
661 spin_lock_bh(&stats->lock);
662 memset(stats, 0, (char *)&stats->lock - (char *)stats);
663 spin_unlock_bh(&stats->lock);
664 ip_vs_zero_estimator(stats);
665}
666
667/*
668 * Update a destination in the given service
669 */
670static void
671__ip_vs_update_dest(struct ip_vs_service *svc,
672 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
673{
674 int conn_flags;
675
676 /* set the weight and the flags */
677 atomic_set(&dest->weight, udest->weight);
678 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
679
680 /* check if local node and update the flags */
681 if (inet_addr_type(udest->addr) == RTN_LOCAL) {
682 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
683 | IP_VS_CONN_F_LOCALNODE;
684 }
685
686 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
687 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
688 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
689 } else {
690 /*
691 * Put the real service in ip_vs_rtable if not present.
692 * For now only for NAT!
693 */
694 write_lock_bh(&__ip_vs_rs_lock);
695 ip_vs_rs_hash(dest);
696 write_unlock_bh(&__ip_vs_rs_lock);
697 }
698 atomic_set(&dest->conn_flags, conn_flags);
699
700 /* bind the service */
701 if (!dest->svc) {
702 __ip_vs_bind_svc(dest, svc);
703 } else {
704 if (dest->svc != svc) {
705 __ip_vs_unbind_svc(dest);
706 ip_vs_zero_stats(&dest->stats);
707 __ip_vs_bind_svc(dest, svc);
708 }
709 }
710
711 /* set the dest status flags */
712 dest->flags |= IP_VS_DEST_F_AVAILABLE;
713
714 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
715 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
716 dest->u_threshold = udest->u_threshold;
717 dest->l_threshold = udest->l_threshold;
718}
719
720
721/*
722 * Create a destination for the given service
723 */
724static int
725ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
726 struct ip_vs_dest **dest_p)
727{
728 struct ip_vs_dest *dest;
729 unsigned atype;
730
731 EnterFunction(2);
732
733 atype = inet_addr_type(udest->addr);
734 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
735 return -EINVAL;
736
0da974f4 737 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
738 if (dest == NULL) {
739 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
740 return -ENOMEM;
741 }
1da177e4
LT
742
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = udest->addr;
748 dest->port = udest->port;
749
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->persistconns, 0);
753 atomic_set(&dest->refcnt, 0);
754
755 INIT_LIST_HEAD(&dest->d_list);
756 spin_lock_init(&dest->dst_lock);
757 spin_lock_init(&dest->stats.lock);
758 __ip_vs_update_dest(svc, dest, udest);
759 ip_vs_new_estimator(&dest->stats);
760
761 *dest_p = dest;
762
763 LeaveFunction(2);
764 return 0;
765}
766
767
768/*
769 * Add a destination into an existing service
770 */
771static int
772ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
773{
774 struct ip_vs_dest *dest;
014d730d
AV
775 __be32 daddr = udest->addr;
776 __be16 dport = udest->port;
1da177e4
LT
777 int ret;
778
779 EnterFunction(2);
780
781 if (udest->weight < 0) {
782 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
783 return -ERANGE;
784 }
785
786 if (udest->l_threshold > udest->u_threshold) {
787 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
788 "upper threshold\n");
789 return -ERANGE;
790 }
791
792 /*
793 * Check if the dest already exists in the list
794 */
795 dest = ip_vs_lookup_dest(svc, daddr, dport);
796 if (dest != NULL) {
797 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
798 return -EEXIST;
799 }
800
801 /*
802 * Check if the dest already exists in the trash and
803 * is from the same service
804 */
805 dest = ip_vs_trash_get_dest(svc, daddr, dport);
806 if (dest != NULL) {
807 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 808 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
809 NIPQUAD(daddr), ntohs(dport),
810 atomic_read(&dest->refcnt),
811 dest->vfwmark,
812 NIPQUAD(dest->vaddr),
813 ntohs(dest->vport));
814 __ip_vs_update_dest(svc, dest, udest);
815
816 /*
817 * Get the destination from the trash
818 */
819 list_del(&dest->n_list);
820
821 ip_vs_new_estimator(&dest->stats);
822
823 write_lock_bh(&__ip_vs_svc_lock);
824
825 /*
826 * Wait until all other svc users go away.
827 */
828 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
829
830 list_add(&dest->n_list, &svc->destinations);
831 svc->num_dests++;
832
833 /* call the update_service function of its scheduler */
834 svc->scheduler->update_service(svc);
835
836 write_unlock_bh(&__ip_vs_svc_lock);
837 return 0;
838 }
839
840 /*
841 * Allocate and initialize the dest structure
842 */
843 ret = ip_vs_new_dest(svc, udest, &dest);
844 if (ret) {
845 return ret;
846 }
847
848 /*
849 * Add the dest entry into the list
850 */
851 atomic_inc(&dest->refcnt);
852
853 write_lock_bh(&__ip_vs_svc_lock);
854
855 /*
856 * Wait until all other svc users go away.
857 */
858 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
859
860 list_add(&dest->n_list, &svc->destinations);
861 svc->num_dests++;
862
863 /* call the update_service function of its scheduler */
864 svc->scheduler->update_service(svc);
865
866 write_unlock_bh(&__ip_vs_svc_lock);
867
868 LeaveFunction(2);
869
870 return 0;
871}
872
873
874/*
875 * Edit a destination in the given service
876 */
877static int
878ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
879{
880 struct ip_vs_dest *dest;
014d730d
AV
881 __be32 daddr = udest->addr;
882 __be16 dport = udest->port;
1da177e4
LT
883
884 EnterFunction(2);
885
886 if (udest->weight < 0) {
887 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
888 return -ERANGE;
889 }
890
891 if (udest->l_threshold > udest->u_threshold) {
892 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
893 "upper threshold\n");
894 return -ERANGE;
895 }
896
897 /*
898 * Lookup the destination list
899 */
900 dest = ip_vs_lookup_dest(svc, daddr, dport);
901 if (dest == NULL) {
902 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
903 return -ENOENT;
904 }
905
906 __ip_vs_update_dest(svc, dest, udest);
907
908 write_lock_bh(&__ip_vs_svc_lock);
909
910 /* Wait until all other svc users go away */
cae7ca3d 911 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
912
913 /* call the update_service, because server weight may be changed */
914 svc->scheduler->update_service(svc);
915
916 write_unlock_bh(&__ip_vs_svc_lock);
917
918 LeaveFunction(2);
919
920 return 0;
921}
922
923
924/*
925 * Delete a destination (must be already unlinked from the service)
926 */
927static void __ip_vs_del_dest(struct ip_vs_dest *dest)
928{
929 ip_vs_kill_estimator(&dest->stats);
930
931 /*
932 * Remove it from the d-linked list with the real services.
933 */
934 write_lock_bh(&__ip_vs_rs_lock);
935 ip_vs_rs_unhash(dest);
936 write_unlock_bh(&__ip_vs_rs_lock);
937
938 /*
939 * Decrease the refcnt of the dest, and free the dest
940 * if nobody refers to it (refcnt=0). Otherwise, throw
941 * the destination into the trash.
942 */
943 if (atomic_dec_and_test(&dest->refcnt)) {
944 ip_vs_dst_reset(dest);
945 /* simply decrease svc->refcnt here, let the caller check
946 and release the service if nobody refers to it.
947 Only user context can release destination and service,
948 and only one user context can update virtual service at a
949 time, so the operation here is OK */
950 atomic_dec(&dest->svc->refcnt);
951 kfree(dest);
952 } else {
4b5bdf5c
RN
953 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
954 "dest->refcnt=%d\n",
1da177e4
LT
955 NIPQUAD(dest->addr), ntohs(dest->port),
956 atomic_read(&dest->refcnt));
957 list_add(&dest->n_list, &ip_vs_dest_trash);
958 atomic_inc(&dest->refcnt);
959 }
960}
961
962
963/*
964 * Unlink a destination from the given service
965 */
966static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
967 struct ip_vs_dest *dest,
968 int svcupd)
969{
970 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
971
972 /*
973 * Remove it from the d-linked destination list.
974 */
975 list_del(&dest->n_list);
976 svc->num_dests--;
977 if (svcupd) {
978 /*
979 * Call the update_service function of its scheduler
980 */
981 svc->scheduler->update_service(svc);
982 }
983}
984
985
986/*
987 * Delete a destination server in the given service
988 */
989static int
990ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
991{
992 struct ip_vs_dest *dest;
014d730d
AV
993 __be32 daddr = udest->addr;
994 __be16 dport = udest->port;
1da177e4
LT
995
996 EnterFunction(2);
997
998 dest = ip_vs_lookup_dest(svc, daddr, dport);
999 if (dest == NULL) {
1000 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1001 return -ENOENT;
1002 }
1003
1004 write_lock_bh(&__ip_vs_svc_lock);
1005
1006 /*
1007 * Wait until all other svc users go away.
1008 */
1009 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1010
1011 /*
1012 * Unlink dest from the service
1013 */
1014 __ip_vs_unlink_dest(svc, dest, 1);
1015
1016 write_unlock_bh(&__ip_vs_svc_lock);
1017
1018 /*
1019 * Delete the destination
1020 */
1021 __ip_vs_del_dest(dest);
1022
1023 LeaveFunction(2);
1024
1025 return 0;
1026}
1027
1028
1029/*
1030 * Add a service into the service hash table
1031 */
1032static int
1033ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1034{
1035 int ret = 0;
1036 struct ip_vs_scheduler *sched = NULL;
1037 struct ip_vs_service *svc = NULL;
1038
1039 /* increase the module use count */
1040 ip_vs_use_count_inc();
1041
1042 /* Lookup the scheduler by 'u->sched_name' */
1043 sched = ip_vs_scheduler_get(u->sched_name);
1044 if (sched == NULL) {
1045 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1046 u->sched_name);
1047 ret = -ENOENT;
1048 goto out_mod_dec;
1049 }
1050
0da974f4 1051 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1052 if (svc == NULL) {
1053 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1054 ret = -ENOMEM;
1055 goto out_err;
1056 }
1da177e4
LT
1057
1058 /* I'm the first user of the service */
1059 atomic_set(&svc->usecnt, 1);
1060 atomic_set(&svc->refcnt, 0);
1061
1062 svc->protocol = u->protocol;
1063 svc->addr = u->addr;
1064 svc->port = u->port;
1065 svc->fwmark = u->fwmark;
1066 svc->flags = u->flags;
1067 svc->timeout = u->timeout * HZ;
1068 svc->netmask = u->netmask;
1069
1070 INIT_LIST_HEAD(&svc->destinations);
1071 rwlock_init(&svc->sched_lock);
1072 spin_lock_init(&svc->stats.lock);
1073
1074 /* Bind the scheduler */
1075 ret = ip_vs_bind_scheduler(svc, sched);
1076 if (ret)
1077 goto out_err;
1078 sched = NULL;
1079
1080 /* Update the virtual service counters */
1081 if (svc->port == FTPPORT)
1082 atomic_inc(&ip_vs_ftpsvc_counter);
1083 else if (svc->port == 0)
1084 atomic_inc(&ip_vs_nullsvc_counter);
1085
1086 ip_vs_new_estimator(&svc->stats);
1087 ip_vs_num_services++;
1088
1089 /* Hash the service into the service table */
1090 write_lock_bh(&__ip_vs_svc_lock);
1091 ip_vs_svc_hash(svc);
1092 write_unlock_bh(&__ip_vs_svc_lock);
1093
1094 *svc_p = svc;
1095 return 0;
1096
1097 out_err:
1098 if (svc != NULL) {
1099 if (svc->scheduler)
1100 ip_vs_unbind_scheduler(svc);
1101 if (svc->inc) {
1102 local_bh_disable();
1103 ip_vs_app_inc_put(svc->inc);
1104 local_bh_enable();
1105 }
1106 kfree(svc);
1107 }
1108 ip_vs_scheduler_put(sched);
1109
1110 out_mod_dec:
1111 /* decrease the module use count */
1112 ip_vs_use_count_dec();
1113
1114 return ret;
1115}
1116
1117
1118/*
1119 * Edit a service and bind it with a new scheduler
1120 */
1121static int
1122ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1123{
1124 struct ip_vs_scheduler *sched, *old_sched;
1125 int ret = 0;
1126
1127 /*
1128 * Lookup the scheduler, by 'u->sched_name'
1129 */
1130 sched = ip_vs_scheduler_get(u->sched_name);
1131 if (sched == NULL) {
1132 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1133 u->sched_name);
1134 return -ENOENT;
1135 }
1136 old_sched = sched;
1137
1138 write_lock_bh(&__ip_vs_svc_lock);
1139
1140 /*
1141 * Wait until all other svc users go away.
1142 */
1143 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1144
1145 /*
1146 * Set the flags and timeout value
1147 */
1148 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1149 svc->timeout = u->timeout * HZ;
1150 svc->netmask = u->netmask;
1151
1152 old_sched = svc->scheduler;
1153 if (sched != old_sched) {
1154 /*
1155 * Unbind the old scheduler
1156 */
1157 if ((ret = ip_vs_unbind_scheduler(svc))) {
1158 old_sched = sched;
1159 goto out;
1160 }
1161
1162 /*
1163 * Bind the new scheduler
1164 */
1165 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1166 /*
1167 * If ip_vs_bind_scheduler fails, restore the old
1168 * scheduler.
1169 * The main reason of failure is out of memory.
1170 *
1171 * The question is if the old scheduler can be
1172 * restored all the time. TODO: if it cannot be
1173 * restored some time, we must delete the service,
1174 * otherwise the system may crash.
1175 */
1176 ip_vs_bind_scheduler(svc, old_sched);
1177 old_sched = sched;
1178 goto out;
1179 }
1180 }
1181
1182 out:
1183 write_unlock_bh(&__ip_vs_svc_lock);
1184
1185 if (old_sched)
1186 ip_vs_scheduler_put(old_sched);
1187
1188 return ret;
1189}
1190
1191
1192/*
1193 * Delete a service from the service list
1194 * - The service must be unlinked, unlocked and not referenced!
1195 * - We are called under _bh lock
1196 */
1197static void __ip_vs_del_service(struct ip_vs_service *svc)
1198{
1199 struct ip_vs_dest *dest, *nxt;
1200 struct ip_vs_scheduler *old_sched;
1201
1202 ip_vs_num_services--;
1203 ip_vs_kill_estimator(&svc->stats);
1204
1205 /* Unbind scheduler */
1206 old_sched = svc->scheduler;
1207 ip_vs_unbind_scheduler(svc);
1208 if (old_sched)
1209 ip_vs_scheduler_put(old_sched);
1210
1211 /* Unbind app inc */
1212 if (svc->inc) {
1213 ip_vs_app_inc_put(svc->inc);
1214 svc->inc = NULL;
1215 }
1216
1217 /*
1218 * Unlink the whole destination list
1219 */
1220 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1221 __ip_vs_unlink_dest(svc, dest, 0);
1222 __ip_vs_del_dest(dest);
1223 }
1224
1225 /*
1226 * Update the virtual service counters
1227 */
1228 if (svc->port == FTPPORT)
1229 atomic_dec(&ip_vs_ftpsvc_counter);
1230 else if (svc->port == 0)
1231 atomic_dec(&ip_vs_nullsvc_counter);
1232
1233 /*
1234 * Free the service if nobody refers to it
1235 */
1236 if (atomic_read(&svc->refcnt) == 0)
1237 kfree(svc);
1238
1239 /* decrease the module use count */
1240 ip_vs_use_count_dec();
1241}
1242
1243/*
1244 * Delete a service from the service list
1245 */
1246static int ip_vs_del_service(struct ip_vs_service *svc)
1247{
1248 if (svc == NULL)
1249 return -EEXIST;
1250
1251 /*
1252 * Unhash it from the service table
1253 */
1254 write_lock_bh(&__ip_vs_svc_lock);
1255
1256 ip_vs_svc_unhash(svc);
1257
1258 /*
1259 * Wait until all the svc users go away.
1260 */
1261 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1262
1263 __ip_vs_del_service(svc);
1264
1265 write_unlock_bh(&__ip_vs_svc_lock);
1266
1267 return 0;
1268}
1269
1270
1271/*
1272 * Flush all the virtual services
1273 */
1274static int ip_vs_flush(void)
1275{
1276 int idx;
1277 struct ip_vs_service *svc, *nxt;
1278
1279 /*
1280 * Flush the service table hashed by <protocol,addr,port>
1281 */
1282 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1283 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1284 write_lock_bh(&__ip_vs_svc_lock);
1285 ip_vs_svc_unhash(svc);
1286 /*
1287 * Wait until all the svc users go away.
1288 */
1289 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1290 __ip_vs_del_service(svc);
1291 write_unlock_bh(&__ip_vs_svc_lock);
1292 }
1293 }
1294
1295 /*
1296 * Flush the service table hashed by fwmark
1297 */
1298 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1299 list_for_each_entry_safe(svc, nxt,
1300 &ip_vs_svc_fwm_table[idx], f_list) {
1301 write_lock_bh(&__ip_vs_svc_lock);
1302 ip_vs_svc_unhash(svc);
1303 /*
1304 * Wait until all the svc users go away.
1305 */
1306 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1307 __ip_vs_del_service(svc);
1308 write_unlock_bh(&__ip_vs_svc_lock);
1309 }
1310 }
1311
1312 return 0;
1313}
1314
1315
1316/*
1317 * Zero counters in a service or all services
1318 */
1319static int ip_vs_zero_service(struct ip_vs_service *svc)
1320{
1321 struct ip_vs_dest *dest;
1322
1323 write_lock_bh(&__ip_vs_svc_lock);
1324 list_for_each_entry(dest, &svc->destinations, n_list) {
1325 ip_vs_zero_stats(&dest->stats);
1326 }
1327 ip_vs_zero_stats(&svc->stats);
1328 write_unlock_bh(&__ip_vs_svc_lock);
1329 return 0;
1330}
1331
1332static int ip_vs_zero_all(void)
1333{
1334 int idx;
1335 struct ip_vs_service *svc;
1336
1337 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1338 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1339 ip_vs_zero_service(svc);
1340 }
1341 }
1342
1343 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1344 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1345 ip_vs_zero_service(svc);
1346 }
1347 }
1348
1349 ip_vs_zero_stats(&ip_vs_stats);
1350 return 0;
1351}
1352
1353
1354static int
1355proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1356 void __user *buffer, size_t *lenp, loff_t *ppos)
1357{
1358 int *valp = table->data;
1359 int val = *valp;
1360 int rc;
1361
1362 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1363 if (write && (*valp != val)) {
1364 if ((*valp < 0) || (*valp > 3)) {
1365 /* Restore the correct value */
1366 *valp = val;
1367 } else {
1da177e4 1368 update_defense_level();
1da177e4
LT
1369 }
1370 }
1371 return rc;
1372}
1373
1374
1375static int
1376proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1377 void __user *buffer, size_t *lenp, loff_t *ppos)
1378{
1379 int *valp = table->data;
1380 int val[2];
1381 int rc;
1382
1383 /* backup the value first */
1384 memcpy(val, valp, sizeof(val));
1385
1386 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1387 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1388 /* Restore the correct value */
1389 memcpy(valp, val, sizeof(val));
1390 }
1391 return rc;
1392}
1393
1394
1395/*
1396 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1397 */
1398
1399static struct ctl_table vs_vars[] = {
1400 {
1401 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1402 .procname = "amemthresh",
1403 .data = &sysctl_ip_vs_amemthresh,
1404 .maxlen = sizeof(int),
1405 .mode = 0644,
1406 .proc_handler = &proc_dointvec,
1407 },
1408#ifdef CONFIG_IP_VS_DEBUG
1409 {
1410 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1411 .procname = "debug_level",
1412 .data = &sysctl_ip_vs_debug_level,
1413 .maxlen = sizeof(int),
1414 .mode = 0644,
1415 .proc_handler = &proc_dointvec,
1416 },
1417#endif
1418 {
1419 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1420 .procname = "am_droprate",
1421 .data = &sysctl_ip_vs_am_droprate,
1422 .maxlen = sizeof(int),
1423 .mode = 0644,
1424 .proc_handler = &proc_dointvec,
1425 },
1426 {
1427 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1428 .procname = "drop_entry",
1429 .data = &sysctl_ip_vs_drop_entry,
1430 .maxlen = sizeof(int),
1431 .mode = 0644,
1432 .proc_handler = &proc_do_defense_mode,
1433 },
1434 {
1435 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1436 .procname = "drop_packet",
1437 .data = &sysctl_ip_vs_drop_packet,
1438 .maxlen = sizeof(int),
1439 .mode = 0644,
1440 .proc_handler = &proc_do_defense_mode,
1441 },
1442 {
1443 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1444 .procname = "secure_tcp",
1445 .data = &sysctl_ip_vs_secure_tcp,
1446 .maxlen = sizeof(int),
1447 .mode = 0644,
1448 .proc_handler = &proc_do_defense_mode,
1449 },
1450#if 0
1451 {
1452 .ctl_name = NET_IPV4_VS_TO_ES,
1453 .procname = "timeout_established",
1454 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1455 .maxlen = sizeof(int),
1456 .mode = 0644,
1457 .proc_handler = &proc_dointvec_jiffies,
1458 },
1459 {
1460 .ctl_name = NET_IPV4_VS_TO_SS,
1461 .procname = "timeout_synsent",
1462 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1463 .maxlen = sizeof(int),
1464 .mode = 0644,
1465 .proc_handler = &proc_dointvec_jiffies,
1466 },
1467 {
1468 .ctl_name = NET_IPV4_VS_TO_SR,
1469 .procname = "timeout_synrecv",
1470 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1471 .maxlen = sizeof(int),
1472 .mode = 0644,
1473 .proc_handler = &proc_dointvec_jiffies,
1474 },
1475 {
1476 .ctl_name = NET_IPV4_VS_TO_FW,
1477 .procname = "timeout_finwait",
1478 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1479 .maxlen = sizeof(int),
1480 .mode = 0644,
1481 .proc_handler = &proc_dointvec_jiffies,
1482 },
1483 {
1484 .ctl_name = NET_IPV4_VS_TO_TW,
1485 .procname = "timeout_timewait",
1486 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1487 .maxlen = sizeof(int),
1488 .mode = 0644,
1489 .proc_handler = &proc_dointvec_jiffies,
1490 },
1491 {
1492 .ctl_name = NET_IPV4_VS_TO_CL,
1493 .procname = "timeout_close",
1494 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1495 .maxlen = sizeof(int),
1496 .mode = 0644,
1497 .proc_handler = &proc_dointvec_jiffies,
1498 },
1499 {
1500 .ctl_name = NET_IPV4_VS_TO_CW,
1501 .procname = "timeout_closewait",
1502 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1503 .maxlen = sizeof(int),
1504 .mode = 0644,
1505 .proc_handler = &proc_dointvec_jiffies,
1506 },
1507 {
1508 .ctl_name = NET_IPV4_VS_TO_LA,
1509 .procname = "timeout_lastack",
1510 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1511 .maxlen = sizeof(int),
1512 .mode = 0644,
1513 .proc_handler = &proc_dointvec_jiffies,
1514 },
1515 {
1516 .ctl_name = NET_IPV4_VS_TO_LI,
1517 .procname = "timeout_listen",
1518 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1519 .maxlen = sizeof(int),
1520 .mode = 0644,
1521 .proc_handler = &proc_dointvec_jiffies,
1522 },
1523 {
1524 .ctl_name = NET_IPV4_VS_TO_SA,
1525 .procname = "timeout_synack",
1526 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1527 .maxlen = sizeof(int),
1528 .mode = 0644,
1529 .proc_handler = &proc_dointvec_jiffies,
1530 },
1531 {
1532 .ctl_name = NET_IPV4_VS_TO_UDP,
1533 .procname = "timeout_udp",
1534 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1535 .maxlen = sizeof(int),
1536 .mode = 0644,
1537 .proc_handler = &proc_dointvec_jiffies,
1538 },
1539 {
1540 .ctl_name = NET_IPV4_VS_TO_ICMP,
1541 .procname = "timeout_icmp",
1542 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1543 .maxlen = sizeof(int),
1544 .mode = 0644,
1545 .proc_handler = &proc_dointvec_jiffies,
1546 },
1547#endif
1548 {
1549 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1550 .procname = "cache_bypass",
1551 .data = &sysctl_ip_vs_cache_bypass,
1552 .maxlen = sizeof(int),
1553 .mode = 0644,
1554 .proc_handler = &proc_dointvec,
1555 },
1556 {
1557 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1558 .procname = "expire_nodest_conn",
1559 .data = &sysctl_ip_vs_expire_nodest_conn,
1560 .maxlen = sizeof(int),
1561 .mode = 0644,
1562 .proc_handler = &proc_dointvec,
1563 },
1564 {
1565 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1566 .procname = "expire_quiescent_template",
1567 .data = &sysctl_ip_vs_expire_quiescent_template,
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
1570 .proc_handler = &proc_dointvec,
1571 },
1572 {
1573 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1574 .procname = "sync_threshold",
1575 .data = &sysctl_ip_vs_sync_threshold,
1576 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1577 .mode = 0644,
1578 .proc_handler = &proc_do_sync_threshold,
1579 },
1580 {
1581 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1582 .procname = "nat_icmp_send",
1583 .data = &sysctl_ip_vs_nat_icmp_send,
1584 .maxlen = sizeof(int),
1585 .mode = 0644,
1586 .proc_handler = &proc_dointvec,
1587 },
1588 { .ctl_name = 0 }
1589};
1590
1591static ctl_table vs_table[] = {
1592 {
1593 .ctl_name = NET_IPV4_VS,
1594 .procname = "vs",
1595 .mode = 0555,
1596 .child = vs_vars
1597 },
1598 { .ctl_name = 0 }
1599};
1600
bf0ff9e5 1601static ctl_table ipvs_ipv4_table[] = {
1da177e4
LT
1602 {
1603 .ctl_name = NET_IPV4,
1604 .procname = "ipv4",
1605 .mode = 0555,
1606 .child = vs_table,
1607 },
1608 { .ctl_name = 0 }
1609};
1610
1611static ctl_table vs_root_table[] = {
1612 {
1613 .ctl_name = CTL_NET,
1614 .procname = "net",
1615 .mode = 0555,
bf0ff9e5 1616 .child = ipvs_ipv4_table,
1da177e4
LT
1617 },
1618 { .ctl_name = 0 }
1619};
1620
1621static struct ctl_table_header * sysctl_header;
1622
1623#ifdef CONFIG_PROC_FS
1624
1625struct ip_vs_iter {
1626 struct list_head *table;
1627 int bucket;
1628};
1629
1630/*
1631 * Write the contents of the VS rule table to a PROCfs file.
1632 * (It is kept just for backward compatibility)
1633 */
1634static inline const char *ip_vs_fwd_name(unsigned flags)
1635{
1636 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1637 case IP_VS_CONN_F_LOCALNODE:
1638 return "Local";
1639 case IP_VS_CONN_F_TUNNEL:
1640 return "Tunnel";
1641 case IP_VS_CONN_F_DROUTE:
1642 return "Route";
1643 default:
1644 return "Masq";
1645 }
1646}
1647
1648
1649/* Get the Nth entry in the two lists */
1650static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1651{
1652 struct ip_vs_iter *iter = seq->private;
1653 int idx;
1654 struct ip_vs_service *svc;
1655
1656 /* look in hash by protocol */
1657 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1658 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1659 if (pos-- == 0){
1660 iter->table = ip_vs_svc_table;
1661 iter->bucket = idx;
1662 return svc;
1663 }
1664 }
1665 }
1666
1667 /* keep looking in fwmark */
1668 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1669 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1670 if (pos-- == 0) {
1671 iter->table = ip_vs_svc_fwm_table;
1672 iter->bucket = idx;
1673 return svc;
1674 }
1675 }
1676 }
1677
1678 return NULL;
1679}
1680
1681static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1682{
1683
1684 read_lock_bh(&__ip_vs_svc_lock);
1685 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1686}
1687
1688
1689static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1690{
1691 struct list_head *e;
1692 struct ip_vs_iter *iter;
1693 struct ip_vs_service *svc;
1694
1695 ++*pos;
1696 if (v == SEQ_START_TOKEN)
1697 return ip_vs_info_array(seq,0);
1698
1699 svc = v;
1700 iter = seq->private;
1701
1702 if (iter->table == ip_vs_svc_table) {
1703 /* next service in table hashed by protocol */
1704 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1705 return list_entry(e, struct ip_vs_service, s_list);
1706
1707
1708 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1709 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1710 s_list) {
1711 return svc;
1712 }
1713 }
1714
1715 iter->table = ip_vs_svc_fwm_table;
1716 iter->bucket = -1;
1717 goto scan_fwmark;
1718 }
1719
1720 /* next service in hashed by fwmark */
1721 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1722 return list_entry(e, struct ip_vs_service, f_list);
1723
1724 scan_fwmark:
1725 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1726 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1727 f_list)
1728 return svc;
1729 }
1730
1731 return NULL;
1732}
1733
1734static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1735{
1736 read_unlock_bh(&__ip_vs_svc_lock);
1737}
1738
1739
1740static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1741{
1742 if (v == SEQ_START_TOKEN) {
1743 seq_printf(seq,
1744 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1745 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1746 seq_puts(seq,
1747 "Prot LocalAddress:Port Scheduler Flags\n");
1748 seq_puts(seq,
1749 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1750 } else {
1751 const struct ip_vs_service *svc = v;
1752 const struct ip_vs_iter *iter = seq->private;
1753 const struct ip_vs_dest *dest;
1754
1755 if (iter->table == ip_vs_svc_table)
1756 seq_printf(seq, "%s %08X:%04X %s ",
1757 ip_vs_proto_name(svc->protocol),
1758 ntohl(svc->addr),
1759 ntohs(svc->port),
1760 svc->scheduler->name);
1761 else
1762 seq_printf(seq, "FWM %08X %s ",
1763 svc->fwmark, svc->scheduler->name);
1764
1765 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1766 seq_printf(seq, "persistent %d %08X\n",
1767 svc->timeout,
1768 ntohl(svc->netmask));
1769 else
1770 seq_putc(seq, '\n');
1771
1772 list_for_each_entry(dest, &svc->destinations, n_list) {
1773 seq_printf(seq,
1774 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1775 ntohl(dest->addr), ntohs(dest->port),
1776 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1777 atomic_read(&dest->weight),
1778 atomic_read(&dest->activeconns),
1779 atomic_read(&dest->inactconns));
1780 }
1781 }
1782 return 0;
1783}
1784
56b3d975 1785static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1786 .start = ip_vs_info_seq_start,
1787 .next = ip_vs_info_seq_next,
1788 .stop = ip_vs_info_seq_stop,
1789 .show = ip_vs_info_seq_show,
1790};
1791
1792static int ip_vs_info_open(struct inode *inode, struct file *file)
1793{
1794 struct seq_file *seq;
1795 int rc = -ENOMEM;
0da974f4 1796 struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
1da177e4
LT
1797
1798 if (!s)
1799 goto out;
1800
1801 rc = seq_open(file, &ip_vs_info_seq_ops);
1802 if (rc)
1803 goto out_kfree;
1804
1805 seq = file->private_data;
1806 seq->private = s;
1da177e4
LT
1807out:
1808 return rc;
1809out_kfree:
1810 kfree(s);
1811 goto out;
1812}
1813
9a32144e 1814static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1815 .owner = THIS_MODULE,
1816 .open = ip_vs_info_open,
1817 .read = seq_read,
1818 .llseek = seq_lseek,
1819 .release = seq_release_private,
1820};
1821
1822#endif
1823
1824struct ip_vs_stats ip_vs_stats;
1825
1826#ifdef CONFIG_PROC_FS
1827static int ip_vs_stats_show(struct seq_file *seq, void *v)
1828{
1829
1830/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1831 seq_puts(seq,
1832 " Total Incoming Outgoing Incoming Outgoing\n");
1833 seq_printf(seq,
1834 " Conns Packets Packets Bytes Bytes\n");
1835
1836 spin_lock_bh(&ip_vs_stats.lock);
1837 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1838 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1839 (unsigned long long) ip_vs_stats.inbytes,
1840 (unsigned long long) ip_vs_stats.outbytes);
1841
1842/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1843 seq_puts(seq,
1844 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1845 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1846 ip_vs_stats.cps,
1847 ip_vs_stats.inpps,
1848 ip_vs_stats.outpps,
1849 ip_vs_stats.inbps,
1850 ip_vs_stats.outbps);
1851 spin_unlock_bh(&ip_vs_stats.lock);
1852
1853 return 0;
1854}
1855
1856static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1857{
1858 return single_open(file, ip_vs_stats_show, NULL);
1859}
1860
9a32144e 1861static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1862 .owner = THIS_MODULE,
1863 .open = ip_vs_stats_seq_open,
1864 .read = seq_read,
1865 .llseek = seq_lseek,
1866 .release = single_release,
1867};
1868
1869#endif
1870
1871/*
1872 * Set timeout values for tcp tcpfin udp in the timeout_table.
1873 */
1874static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1875{
1876 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1877 u->tcp_timeout,
1878 u->tcp_fin_timeout,
1879 u->udp_timeout);
1880
1881#ifdef CONFIG_IP_VS_PROTO_TCP
1882 if (u->tcp_timeout) {
1883 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1884 = u->tcp_timeout * HZ;
1885 }
1886
1887 if (u->tcp_fin_timeout) {
1888 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1889 = u->tcp_fin_timeout * HZ;
1890 }
1891#endif
1892
1893#ifdef CONFIG_IP_VS_PROTO_UDP
1894 if (u->udp_timeout) {
1895 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1896 = u->udp_timeout * HZ;
1897 }
1898#endif
1899 return 0;
1900}
1901
1902
1903#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1904#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1905#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1906 sizeof(struct ip_vs_dest_user))
1907#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1908#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1909#define MAX_ARG_LEN SVCDEST_ARG_LEN
1910
9b5b5cff 1911static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1912 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1913 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1914 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1915 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1916 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1917 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1919 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1923};
1924
1925static int
1926do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1927{
1928 int ret;
1929 unsigned char arg[MAX_ARG_LEN];
1930 struct ip_vs_service_user *usvc;
1931 struct ip_vs_service *svc;
1932 struct ip_vs_dest_user *udest;
1933
1934 if (!capable(CAP_NET_ADMIN))
1935 return -EPERM;
1936
1937 if (len != set_arglen[SET_CMDID(cmd)]) {
1938 IP_VS_ERR("set_ctl: len %u != %u\n",
1939 len, set_arglen[SET_CMDID(cmd)]);
1940 return -EINVAL;
1941 }
1942
1943 if (copy_from_user(arg, user, len) != 0)
1944 return -EFAULT;
1945
1946 /* increase the module use count */
1947 ip_vs_use_count_inc();
1948
14cc3e2b 1949 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1950 ret = -ERESTARTSYS;
1951 goto out_dec;
1952 }
1953
1954 if (cmd == IP_VS_SO_SET_FLUSH) {
1955 /* Flush the virtual service */
1956 ret = ip_vs_flush();
1957 goto out_unlock;
1958 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1959 /* Set timeout values for (tcp tcpfin udp) */
1960 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1961 goto out_unlock;
1962 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1963 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1964 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1965 goto out_unlock;
1966 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1967 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1968 ret = stop_sync_thread(dm->state);
1969 goto out_unlock;
1970 }
1971
1972 usvc = (struct ip_vs_service_user *)arg;
1973 udest = (struct ip_vs_dest_user *)(usvc + 1);
1974
1975 if (cmd == IP_VS_SO_SET_ZERO) {
1976 /* if no service address is set, zero counters in all */
1977 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1978 ret = ip_vs_zero_all();
1979 goto out_unlock;
1980 }
1981 }
1982
1983 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1984 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1985 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1986 usvc->protocol, NIPQUAD(usvc->addr),
1987 ntohs(usvc->port), usvc->sched_name);
1988 ret = -EFAULT;
1989 goto out_unlock;
1990 }
1991
1992 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1993 if (usvc->fwmark == 0)
1994 svc = __ip_vs_service_get(usvc->protocol,
1995 usvc->addr, usvc->port);
1996 else
1997 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1998
1999 if (cmd != IP_VS_SO_SET_ADD
2000 && (svc == NULL || svc->protocol != usvc->protocol)) {
2001 ret = -ESRCH;
2002 goto out_unlock;
2003 }
2004
2005 switch (cmd) {
2006 case IP_VS_SO_SET_ADD:
2007 if (svc != NULL)
2008 ret = -EEXIST;
2009 else
2010 ret = ip_vs_add_service(usvc, &svc);
2011 break;
2012 case IP_VS_SO_SET_EDIT:
2013 ret = ip_vs_edit_service(svc, usvc);
2014 break;
2015 case IP_VS_SO_SET_DEL:
2016 ret = ip_vs_del_service(svc);
2017 if (!ret)
2018 goto out_unlock;
2019 break;
2020 case IP_VS_SO_SET_ZERO:
2021 ret = ip_vs_zero_service(svc);
2022 break;
2023 case IP_VS_SO_SET_ADDDEST:
2024 ret = ip_vs_add_dest(svc, udest);
2025 break;
2026 case IP_VS_SO_SET_EDITDEST:
2027 ret = ip_vs_edit_dest(svc, udest);
2028 break;
2029 case IP_VS_SO_SET_DELDEST:
2030 ret = ip_vs_del_dest(svc, udest);
2031 break;
2032 default:
2033 ret = -EINVAL;
2034 }
2035
2036 if (svc)
2037 ip_vs_service_put(svc);
2038
2039 out_unlock:
14cc3e2b 2040 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2041 out_dec:
2042 /* decrease the module use count */
2043 ip_vs_use_count_dec();
2044
2045 return ret;
2046}
2047
2048
2049static void
2050ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2051{
2052 spin_lock_bh(&src->lock);
2053 memcpy(dst, src, (char*)&src->lock - (char*)src);
2054 spin_unlock_bh(&src->lock);
2055}
2056
2057static void
2058ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2059{
2060 dst->protocol = src->protocol;
2061 dst->addr = src->addr;
2062 dst->port = src->port;
2063 dst->fwmark = src->fwmark;
4da62fc7 2064 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2065 dst->flags = src->flags;
2066 dst->timeout = src->timeout / HZ;
2067 dst->netmask = src->netmask;
2068 dst->num_dests = src->num_dests;
2069 ip_vs_copy_stats(&dst->stats, &src->stats);
2070}
2071
2072static inline int
2073__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2074 struct ip_vs_get_services __user *uptr)
2075{
2076 int idx, count=0;
2077 struct ip_vs_service *svc;
2078 struct ip_vs_service_entry entry;
2079 int ret = 0;
2080
2081 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2082 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2083 if (count >= get->num_services)
2084 goto out;
4da62fc7 2085 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2086 ip_vs_copy_service(&entry, svc);
2087 if (copy_to_user(&uptr->entrytable[count],
2088 &entry, sizeof(entry))) {
2089 ret = -EFAULT;
2090 goto out;
2091 }
2092 count++;
2093 }
2094 }
2095
2096 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2097 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2098 if (count >= get->num_services)
2099 goto out;
4da62fc7 2100 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2101 ip_vs_copy_service(&entry, svc);
2102 if (copy_to_user(&uptr->entrytable[count],
2103 &entry, sizeof(entry))) {
2104 ret = -EFAULT;
2105 goto out;
2106 }
2107 count++;
2108 }
2109 }
2110 out:
2111 return ret;
2112}
2113
2114static inline int
2115__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2116 struct ip_vs_get_dests __user *uptr)
2117{
2118 struct ip_vs_service *svc;
2119 int ret = 0;
2120
2121 if (get->fwmark)
2122 svc = __ip_vs_svc_fwm_get(get->fwmark);
2123 else
2124 svc = __ip_vs_service_get(get->protocol,
2125 get->addr, get->port);
2126 if (svc) {
2127 int count = 0;
2128 struct ip_vs_dest *dest;
2129 struct ip_vs_dest_entry entry;
2130
2131 list_for_each_entry(dest, &svc->destinations, n_list) {
2132 if (count >= get->num_dests)
2133 break;
2134
2135 entry.addr = dest->addr;
2136 entry.port = dest->port;
2137 entry.conn_flags = atomic_read(&dest->conn_flags);
2138 entry.weight = atomic_read(&dest->weight);
2139 entry.u_threshold = dest->u_threshold;
2140 entry.l_threshold = dest->l_threshold;
2141 entry.activeconns = atomic_read(&dest->activeconns);
2142 entry.inactconns = atomic_read(&dest->inactconns);
2143 entry.persistconns = atomic_read(&dest->persistconns);
2144 ip_vs_copy_stats(&entry.stats, &dest->stats);
2145 if (copy_to_user(&uptr->entrytable[count],
2146 &entry, sizeof(entry))) {
2147 ret = -EFAULT;
2148 break;
2149 }
2150 count++;
2151 }
2152 ip_vs_service_put(svc);
2153 } else
2154 ret = -ESRCH;
2155 return ret;
2156}
2157
2158static inline void
2159__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2160{
2161#ifdef CONFIG_IP_VS_PROTO_TCP
2162 u->tcp_timeout =
2163 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2164 u->tcp_fin_timeout =
2165 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2166#endif
2167#ifdef CONFIG_IP_VS_PROTO_UDP
2168 u->udp_timeout =
2169 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2170#endif
2171}
2172
2173
2174#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2175#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2176#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2177#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2178#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2179#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2180#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2181
9b5b5cff 2182static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2183 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2184 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2185 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2186 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2187 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2188 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2189 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2190};
2191
2192static int
2193do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2194{
2195 unsigned char arg[128];
2196 int ret = 0;
2197
2198 if (!capable(CAP_NET_ADMIN))
2199 return -EPERM;
2200
2201 if (*len < get_arglen[GET_CMDID(cmd)]) {
2202 IP_VS_ERR("get_ctl: len %u < %u\n",
2203 *len, get_arglen[GET_CMDID(cmd)]);
2204 return -EINVAL;
2205 }
2206
2207 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2208 return -EFAULT;
2209
14cc3e2b 2210 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2211 return -ERESTARTSYS;
2212
2213 switch (cmd) {
2214 case IP_VS_SO_GET_VERSION:
2215 {
2216 char buf[64];
2217
2218 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2219 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2220 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2221 ret = -EFAULT;
2222 goto out;
2223 }
2224 *len = strlen(buf)+1;
2225 }
2226 break;
2227
2228 case IP_VS_SO_GET_INFO:
2229 {
2230 struct ip_vs_getinfo info;
2231 info.version = IP_VS_VERSION_CODE;
2232 info.size = IP_VS_CONN_TAB_SIZE;
2233 info.num_services = ip_vs_num_services;
2234 if (copy_to_user(user, &info, sizeof(info)) != 0)
2235 ret = -EFAULT;
2236 }
2237 break;
2238
2239 case IP_VS_SO_GET_SERVICES:
2240 {
2241 struct ip_vs_get_services *get;
2242 int size;
2243
2244 get = (struct ip_vs_get_services *)arg;
2245 size = sizeof(*get) +
2246 sizeof(struct ip_vs_service_entry) * get->num_services;
2247 if (*len != size) {
2248 IP_VS_ERR("length: %u != %u\n", *len, size);
2249 ret = -EINVAL;
2250 goto out;
2251 }
2252 ret = __ip_vs_get_service_entries(get, user);
2253 }
2254 break;
2255
2256 case IP_VS_SO_GET_SERVICE:
2257 {
2258 struct ip_vs_service_entry *entry;
2259 struct ip_vs_service *svc;
2260
2261 entry = (struct ip_vs_service_entry *)arg;
2262 if (entry->fwmark)
2263 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2264 else
2265 svc = __ip_vs_service_get(entry->protocol,
2266 entry->addr, entry->port);
2267 if (svc) {
2268 ip_vs_copy_service(entry, svc);
2269 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2270 ret = -EFAULT;
2271 ip_vs_service_put(svc);
2272 } else
2273 ret = -ESRCH;
2274 }
2275 break;
2276
2277 case IP_VS_SO_GET_DESTS:
2278 {
2279 struct ip_vs_get_dests *get;
2280 int size;
2281
2282 get = (struct ip_vs_get_dests *)arg;
2283 size = sizeof(*get) +
2284 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2285 if (*len != size) {
2286 IP_VS_ERR("length: %u != %u\n", *len, size);
2287 ret = -EINVAL;
2288 goto out;
2289 }
2290 ret = __ip_vs_get_dest_entries(get, user);
2291 }
2292 break;
2293
2294 case IP_VS_SO_GET_TIMEOUT:
2295 {
2296 struct ip_vs_timeout_user t;
2297
2298 __ip_vs_get_timeouts(&t);
2299 if (copy_to_user(user, &t, sizeof(t)) != 0)
2300 ret = -EFAULT;
2301 }
2302 break;
2303
2304 case IP_VS_SO_GET_DAEMON:
2305 {
2306 struct ip_vs_daemon_user d[2];
2307
2308 memset(&d, 0, sizeof(d));
2309 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2310 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2311 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2312 d[0].syncid = ip_vs_master_syncid;
2313 }
2314 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2315 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2316 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2317 d[1].syncid = ip_vs_backup_syncid;
2318 }
2319 if (copy_to_user(user, &d, sizeof(d)) != 0)
2320 ret = -EFAULT;
2321 }
2322 break;
2323
2324 default:
2325 ret = -EINVAL;
2326 }
2327
2328 out:
14cc3e2b 2329 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2330 return ret;
2331}
2332
2333
2334static struct nf_sockopt_ops ip_vs_sockopts = {
2335 .pf = PF_INET,
2336 .set_optmin = IP_VS_BASE_CTL,
2337 .set_optmax = IP_VS_SO_SET_MAX+1,
2338 .set = do_ip_vs_set_ctl,
2339 .get_optmin = IP_VS_BASE_CTL,
2340 .get_optmax = IP_VS_SO_GET_MAX+1,
2341 .get = do_ip_vs_get_ctl,
16fcec35 2342 .owner = THIS_MODULE,
1da177e4
LT
2343};
2344
2345
2346int ip_vs_control_init(void)
2347{
2348 int ret;
2349 int idx;
2350
2351 EnterFunction(2);
2352
2353 ret = nf_register_sockopt(&ip_vs_sockopts);
2354 if (ret) {
2355 IP_VS_ERR("cannot register sockopt.\n");
2356 return ret;
2357 }
2358
2359 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
2360 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
2361
0b4d4147 2362 sysctl_header = register_sysctl_table(vs_root_table);
1da177e4
LT
2363
2364 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2365 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2366 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2367 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2368 }
2369 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2370 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2371 }
2372
2373 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2374 spin_lock_init(&ip_vs_stats.lock);
2375 ip_vs_new_estimator(&ip_vs_stats);
2376
2377 /* Hook the defense timer */
2378 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2379
2380 LeaveFunction(2);
2381 return 0;
2382}
2383
2384
2385void ip_vs_control_cleanup(void)
2386{
2387 EnterFunction(2);
2388 ip_vs_trash_cleanup();
2389 cancel_rearming_delayed_work(&defense_work);
28e53bdd 2390 cancel_work_sync(&defense_work.work);
1da177e4
LT
2391 ip_vs_kill_estimator(&ip_vs_stats);
2392 unregister_sysctl_table(sysctl_header);
2393 proc_net_remove("ip_vs_stats");
2394 proc_net_remove("ip_vs");
2395 nf_unregister_sockopt(&ip_vs_sockopts);
2396 LeaveFunction(2);
2397}