]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
[PATCH] capable/capability.h (fs/)
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * Changes:
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/sysctl.h>
28#include <linux/proc_fs.h>
29#include <linux/workqueue.h>
30#include <linux/swap.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33
34#include <linux/netfilter.h>
35#include <linux/netfilter_ipv4.h>
36
37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4
LT
39#include <net/sock.h>
40
41#include <asm/uaccess.h>
42
43#include <net/ip_vs.h>
44
45/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
46static DECLARE_MUTEX(__ip_vs_mutex);
47
48/* lock for service table */
49static DEFINE_RWLOCK(__ip_vs_svc_lock);
50
51/* lock for table with the real services */
52static DEFINE_RWLOCK(__ip_vs_rs_lock);
53
54/* lock for state and timeout tables */
55static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
56
57/* lock for drop entry handling */
58static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
59
60/* lock for drop packet handling */
61static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
62
63/* 1/rate drop and drop-entry variables */
64int ip_vs_drop_rate = 0;
65int ip_vs_drop_counter = 0;
66static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
67
68/* number of virtual services */
69static int ip_vs_num_services = 0;
70
71/* sysctl variables */
72static int sysctl_ip_vs_drop_entry = 0;
73static int sysctl_ip_vs_drop_packet = 0;
74static int sysctl_ip_vs_secure_tcp = 0;
75static int sysctl_ip_vs_amemthresh = 1024;
76static int sysctl_ip_vs_am_droprate = 10;
77int sysctl_ip_vs_cache_bypass = 0;
78int sysctl_ip_vs_expire_nodest_conn = 0;
79int sysctl_ip_vs_expire_quiescent_template = 0;
80int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
81int sysctl_ip_vs_nat_icmp_send = 0;
82
83
84#ifdef CONFIG_IP_VS_DEBUG
85static int sysctl_ip_vs_debug_level = 0;
86
87int ip_vs_get_debug_level(void)
88{
89 return sysctl_ip_vs_debug_level;
90}
91#endif
92
93/*
af9debd4
JA
94 * update_defense_level is called from keventd and from sysctl,
95 * so it needs to protect itself from softirqs
1da177e4
LT
96 */
97static void update_defense_level(void)
98{
99 struct sysinfo i;
100 static int old_secure_tcp = 0;
101 int availmem;
102 int nomem;
103 int to_change = -1;
104
105 /* we only count free and buffered memory (in pages) */
106 si_meminfo(&i);
107 availmem = i.freeram + i.bufferram;
108 /* however in linux 2.5 the i.bufferram is total page cache size,
109 we need adjust it */
110 /* si_swapinfo(&i); */
111 /* availmem = availmem - (i.totalswap - i.freeswap); */
112
113 nomem = (availmem < sysctl_ip_vs_amemthresh);
114
af9debd4
JA
115 local_bh_disable();
116
1da177e4
LT
117 /* drop_entry */
118 spin_lock(&__ip_vs_dropentry_lock);
119 switch (sysctl_ip_vs_drop_entry) {
120 case 0:
121 atomic_set(&ip_vs_dropentry, 0);
122 break;
123 case 1:
124 if (nomem) {
125 atomic_set(&ip_vs_dropentry, 1);
126 sysctl_ip_vs_drop_entry = 2;
127 } else {
128 atomic_set(&ip_vs_dropentry, 0);
129 }
130 break;
131 case 2:
132 if (nomem) {
133 atomic_set(&ip_vs_dropentry, 1);
134 } else {
135 atomic_set(&ip_vs_dropentry, 0);
136 sysctl_ip_vs_drop_entry = 1;
137 };
138 break;
139 case 3:
140 atomic_set(&ip_vs_dropentry, 1);
141 break;
142 }
143 spin_unlock(&__ip_vs_dropentry_lock);
144
145 /* drop_packet */
146 spin_lock(&__ip_vs_droppacket_lock);
147 switch (sysctl_ip_vs_drop_packet) {
148 case 0:
149 ip_vs_drop_rate = 0;
150 break;
151 case 1:
152 if (nomem) {
153 ip_vs_drop_rate = ip_vs_drop_counter
154 = sysctl_ip_vs_amemthresh /
155 (sysctl_ip_vs_amemthresh-availmem);
156 sysctl_ip_vs_drop_packet = 2;
157 } else {
158 ip_vs_drop_rate = 0;
159 }
160 break;
161 case 2:
162 if (nomem) {
163 ip_vs_drop_rate = ip_vs_drop_counter
164 = sysctl_ip_vs_amemthresh /
165 (sysctl_ip_vs_amemthresh-availmem);
166 } else {
167 ip_vs_drop_rate = 0;
168 sysctl_ip_vs_drop_packet = 1;
169 }
170 break;
171 case 3:
172 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
173 break;
174 }
175 spin_unlock(&__ip_vs_droppacket_lock);
176
177 /* secure_tcp */
178 write_lock(&__ip_vs_securetcp_lock);
179 switch (sysctl_ip_vs_secure_tcp) {
180 case 0:
181 if (old_secure_tcp >= 2)
182 to_change = 0;
183 break;
184 case 1:
185 if (nomem) {
186 if (old_secure_tcp < 2)
187 to_change = 1;
188 sysctl_ip_vs_secure_tcp = 2;
189 } else {
190 if (old_secure_tcp >= 2)
191 to_change = 0;
192 }
193 break;
194 case 2:
195 if (nomem) {
196 if (old_secure_tcp < 2)
197 to_change = 1;
198 } else {
199 if (old_secure_tcp >= 2)
200 to_change = 0;
201 sysctl_ip_vs_secure_tcp = 1;
202 }
203 break;
204 case 3:
205 if (old_secure_tcp < 2)
206 to_change = 1;
207 break;
208 }
209 old_secure_tcp = sysctl_ip_vs_secure_tcp;
210 if (to_change >= 0)
211 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
212 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
213
214 local_bh_enable();
1da177e4
LT
215}
216
217
218/*
219 * Timer for checking the defense
220 */
221#define DEFENSE_TIMER_PERIOD 1*HZ
222static void defense_work_handler(void *data);
223static DECLARE_WORK(defense_work, defense_work_handler, NULL);
224
225static void defense_work_handler(void *data)
226{
227 update_defense_level();
228 if (atomic_read(&ip_vs_dropentry))
229 ip_vs_random_dropentry();
230
231 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
232}
233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
259/*
260 * Hash table: for real service lookups
261 */
262#define IP_VS_RTAB_BITS 4
263#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
264#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
265
266static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
267
268/*
269 * Trash for destinations
270 */
271static LIST_HEAD(ip_vs_dest_trash);
272
273/*
274 * FTP & NULL virtual service counters
275 */
276static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
277static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
278
279
280/*
281 * Returns hash value for virtual service
282 */
283static __inline__ unsigned
284ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
285{
286 register unsigned porth = ntohs(port);
287
288 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
289 & IP_VS_SVC_TAB_MASK;
290}
291
292/*
293 * Returns hash value of fwmark for virtual service lookup
294 */
295static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
296{
297 return fwmark & IP_VS_SVC_TAB_MASK;
298}
299
300/*
301 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
302 * or in the ip_vs_svc_fwm_table by fwmark.
303 * Should be called with locked tables.
304 */
305static int ip_vs_svc_hash(struct ip_vs_service *svc)
306{
307 unsigned hash;
308
309 if (svc->flags & IP_VS_SVC_F_HASHED) {
310 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
311 "called from %p\n", __builtin_return_address(0));
312 return 0;
313 }
314
315 if (svc->fwmark == 0) {
316 /*
317 * Hash it by <protocol,addr,port> in ip_vs_svc_table
318 */
319 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
320 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
321 } else {
322 /*
323 * Hash it by fwmark in ip_vs_svc_fwm_table
324 */
325 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
326 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
327 }
328
329 svc->flags |= IP_VS_SVC_F_HASHED;
330 /* increase its refcnt because it is referenced by the svc table */
331 atomic_inc(&svc->refcnt);
332 return 1;
333}
334
335
336/*
337 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
338 * Should be called with locked tables.
339 */
340static int ip_vs_svc_unhash(struct ip_vs_service *svc)
341{
342 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
343 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /* Remove it from the ip_vs_svc_table table */
350 list_del(&svc->s_list);
351 } else {
352 /* Remove it from the ip_vs_svc_fwm_table table */
353 list_del(&svc->f_list);
354 }
355
356 svc->flags &= ~IP_VS_SVC_F_HASHED;
357 atomic_dec(&svc->refcnt);
358 return 1;
359}
360
361
362/*
363 * Get service by {proto,addr,port} in the service table.
364 */
365static __inline__ struct ip_vs_service *
366__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
367{
368 unsigned hash;
369 struct ip_vs_service *svc;
370
371 /* Check for "full" addressed entries */
372 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
373
374 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
375 if ((svc->addr == vaddr)
376 && (svc->port == vport)
377 && (svc->protocol == protocol)) {
378 /* HIT */
379 atomic_inc(&svc->usecnt);
380 return svc;
381 }
382 }
383
384 return NULL;
385}
386
387
388/*
389 * Get service by {fwmark} in the service table.
390 */
391static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
392{
393 unsigned hash;
394 struct ip_vs_service *svc;
395
396 /* Check for fwmark addressed entries */
397 hash = ip_vs_svc_fwm_hashkey(fwmark);
398
399 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
400 if (svc->fwmark == fwmark) {
401 /* HIT */
402 atomic_inc(&svc->usecnt);
403 return svc;
404 }
405 }
406
407 return NULL;
408}
409
410struct ip_vs_service *
411ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
412{
413 struct ip_vs_service *svc;
414
415 read_lock(&__ip_vs_svc_lock);
416
417 /*
418 * Check the table hashed by fwmark first
419 */
420 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
421 goto out;
422
423 /*
424 * Check the table hashed by <protocol,addr,port>
425 * for "full" addressed entries
426 */
427 svc = __ip_vs_service_get(protocol, vaddr, vport);
428
429 if (svc == NULL
430 && protocol == IPPROTO_TCP
431 && atomic_read(&ip_vs_ftpsvc_counter)
432 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
433 /*
434 * Check if ftp service entry exists, the packet
435 * might belong to FTP data connections.
436 */
437 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
438 }
439
440 if (svc == NULL
441 && atomic_read(&ip_vs_nullsvc_counter)) {
442 /*
443 * Check if the catch-all port (port zero) exists
444 */
445 svc = __ip_vs_service_get(protocol, vaddr, 0);
446 }
447
448 out:
449 read_unlock(&__ip_vs_svc_lock);
450
4b5bdf5c 451 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
452 fwmark, ip_vs_proto_name(protocol),
453 NIPQUAD(vaddr), ntohs(vport),
454 svc?"hit":"not hit");
455
456 return svc;
457}
458
459
460static inline void
461__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462{
463 atomic_inc(&svc->refcnt);
464 dest->svc = svc;
465}
466
467static inline void
468__ip_vs_unbind_svc(struct ip_vs_dest *dest)
469{
470 struct ip_vs_service *svc = dest->svc;
471
472 dest->svc = NULL;
473 if (atomic_dec_and_test(&svc->refcnt))
474 kfree(svc);
475}
476
477
478/*
479 * Returns hash value for real service
480 */
481static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
482{
483 register unsigned porth = ntohs(port);
484
485 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
486 & IP_VS_RTAB_MASK;
487}
488
489/*
490 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
491 * should be called with locked tables.
492 */
493static int ip_vs_rs_hash(struct ip_vs_dest *dest)
494{
495 unsigned hash;
496
497 if (!list_empty(&dest->d_list)) {
498 return 0;
499 }
500
501 /*
502 * Hash by proto,addr,port,
503 * which are the parameters of the real service.
504 */
505 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
506 list_add(&dest->d_list, &ip_vs_rtable[hash]);
507
508 return 1;
509}
510
511/*
512 * UNhashes ip_vs_dest from ip_vs_rtable.
513 * should be called with locked tables.
514 */
515static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
516{
517 /*
518 * Remove it from the ip_vs_rtable table.
519 */
520 if (!list_empty(&dest->d_list)) {
521 list_del(&dest->d_list);
522 INIT_LIST_HEAD(&dest->d_list);
523 }
524
525 return 1;
526}
527
528/*
529 * Lookup real service by <proto,addr,port> in the real service table.
530 */
531struct ip_vs_dest *
532ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
533{
534 unsigned hash;
535 struct ip_vs_dest *dest;
536
537 /*
538 * Check for "full" addressed entries
539 * Return the first found entry
540 */
541 hash = ip_vs_rs_hashkey(daddr, dport);
542
543 read_lock(&__ip_vs_rs_lock);
544 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
545 if ((dest->addr == daddr)
546 && (dest->port == dport)
547 && ((dest->protocol == protocol) ||
548 dest->vfwmark)) {
549 /* HIT */
550 read_unlock(&__ip_vs_rs_lock);
551 return dest;
552 }
553 }
554 read_unlock(&__ip_vs_rs_lock);
555
556 return NULL;
557}
558
559/*
560 * Lookup destination by {addr,port} in the given service
561 */
562static struct ip_vs_dest *
563ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
564{
565 struct ip_vs_dest *dest;
566
567 /*
568 * Find the destination for the given service
569 */
570 list_for_each_entry(dest, &svc->destinations, n_list) {
571 if ((dest->addr == daddr) && (dest->port == dport)) {
572 /* HIT */
573 return dest;
574 }
575 }
576
577 return NULL;
578}
579
580
581/*
582 * Lookup dest by {svc,addr,port} in the destination trash.
583 * The destination trash is used to hold the destinations that are removed
584 * from the service table but are still referenced by some conn entries.
585 * The reason to add the destination trash is when the dest is temporary
586 * down (either by administrator or by monitor program), the dest can be
587 * picked back from the trash, the remaining connections to the dest can
588 * continue, and the counting information of the dest is also useful for
589 * scheduling.
590 */
591static struct ip_vs_dest *
592ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
593{
594 struct ip_vs_dest *dest, *nxt;
595
596 /*
597 * Find the destination in trash
598 */
599 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
600 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 601 "dest->refcnt=%d\n",
1da177e4
LT
602 dest->vfwmark,
603 NIPQUAD(dest->addr), ntohs(dest->port),
604 atomic_read(&dest->refcnt));
605 if (dest->addr == daddr &&
606 dest->port == dport &&
607 dest->vfwmark == svc->fwmark &&
608 dest->protocol == svc->protocol &&
609 (svc->fwmark ||
610 (dest->vaddr == svc->addr &&
611 dest->vport == svc->port))) {
612 /* HIT */
613 return dest;
614 }
615
616 /*
617 * Try to purge the destination from trash if not referenced
618 */
619 if (atomic_read(&dest->refcnt) == 1) {
620 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
621 "from trash\n",
622 dest->vfwmark,
623 NIPQUAD(dest->addr), ntohs(dest->port));
624 list_del(&dest->n_list);
625 ip_vs_dst_reset(dest);
626 __ip_vs_unbind_svc(dest);
627 kfree(dest);
628 }
629 }
630
631 return NULL;
632}
633
634
635/*
636 * Clean up all the destinations in the trash
637 * Called by the ip_vs_control_cleanup()
638 *
639 * When the ip_vs_control_clearup is activated by ipvs module exit,
640 * the service tables must have been flushed and all the connections
641 * are expired, and the refcnt of each destination in the trash must
642 * be 1, so we simply release them here.
643 */
644static void ip_vs_trash_cleanup(void)
645{
646 struct ip_vs_dest *dest, *nxt;
647
648 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
649 list_del(&dest->n_list);
650 ip_vs_dst_reset(dest);
651 __ip_vs_unbind_svc(dest);
652 kfree(dest);
653 }
654}
655
656
657static void
658ip_vs_zero_stats(struct ip_vs_stats *stats)
659{
660 spin_lock_bh(&stats->lock);
661 memset(stats, 0, (char *)&stats->lock - (char *)stats);
662 spin_unlock_bh(&stats->lock);
663 ip_vs_zero_estimator(stats);
664}
665
666/*
667 * Update a destination in the given service
668 */
669static void
670__ip_vs_update_dest(struct ip_vs_service *svc,
671 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
672{
673 int conn_flags;
674
675 /* set the weight and the flags */
676 atomic_set(&dest->weight, udest->weight);
677 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
678
679 /* check if local node and update the flags */
680 if (inet_addr_type(udest->addr) == RTN_LOCAL) {
681 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
682 | IP_VS_CONN_F_LOCALNODE;
683 }
684
685 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
686 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
687 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
688 } else {
689 /*
690 * Put the real service in ip_vs_rtable if not present.
691 * For now only for NAT!
692 */
693 write_lock_bh(&__ip_vs_rs_lock);
694 ip_vs_rs_hash(dest);
695 write_unlock_bh(&__ip_vs_rs_lock);
696 }
697 atomic_set(&dest->conn_flags, conn_flags);
698
699 /* bind the service */
700 if (!dest->svc) {
701 __ip_vs_bind_svc(dest, svc);
702 } else {
703 if (dest->svc != svc) {
704 __ip_vs_unbind_svc(dest);
705 ip_vs_zero_stats(&dest->stats);
706 __ip_vs_bind_svc(dest, svc);
707 }
708 }
709
710 /* set the dest status flags */
711 dest->flags |= IP_VS_DEST_F_AVAILABLE;
712
713 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
714 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
715 dest->u_threshold = udest->u_threshold;
716 dest->l_threshold = udest->l_threshold;
717}
718
719
720/*
721 * Create a destination for the given service
722 */
723static int
724ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
725 struct ip_vs_dest **dest_p)
726{
727 struct ip_vs_dest *dest;
728 unsigned atype;
729
730 EnterFunction(2);
731
732 atype = inet_addr_type(udest->addr);
733 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
734 return -EINVAL;
735
736 dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737 if (dest == NULL) {
738 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739 return -ENOMEM;
740 }
741 memset(dest, 0, sizeof(struct ip_vs_dest));
742
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = udest->addr;
748 dest->port = udest->port;
749
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->persistconns, 0);
753 atomic_set(&dest->refcnt, 0);
754
755 INIT_LIST_HEAD(&dest->d_list);
756 spin_lock_init(&dest->dst_lock);
757 spin_lock_init(&dest->stats.lock);
758 __ip_vs_update_dest(svc, dest, udest);
759 ip_vs_new_estimator(&dest->stats);
760
761 *dest_p = dest;
762
763 LeaveFunction(2);
764 return 0;
765}
766
767
768/*
769 * Add a destination into an existing service
770 */
771static int
772ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
773{
774 struct ip_vs_dest *dest;
775 __u32 daddr = udest->addr;
776 __u16 dport = udest->port;
777 int ret;
778
779 EnterFunction(2);
780
781 if (udest->weight < 0) {
782 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
783 return -ERANGE;
784 }
785
786 if (udest->l_threshold > udest->u_threshold) {
787 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
788 "upper threshold\n");
789 return -ERANGE;
790 }
791
792 /*
793 * Check if the dest already exists in the list
794 */
795 dest = ip_vs_lookup_dest(svc, daddr, dport);
796 if (dest != NULL) {
797 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
798 return -EEXIST;
799 }
800
801 /*
802 * Check if the dest already exists in the trash and
803 * is from the same service
804 */
805 dest = ip_vs_trash_get_dest(svc, daddr, dport);
806 if (dest != NULL) {
807 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 808 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
809 NIPQUAD(daddr), ntohs(dport),
810 atomic_read(&dest->refcnt),
811 dest->vfwmark,
812 NIPQUAD(dest->vaddr),
813 ntohs(dest->vport));
814 __ip_vs_update_dest(svc, dest, udest);
815
816 /*
817 * Get the destination from the trash
818 */
819 list_del(&dest->n_list);
820
821 ip_vs_new_estimator(&dest->stats);
822
823 write_lock_bh(&__ip_vs_svc_lock);
824
825 /*
826 * Wait until all other svc users go away.
827 */
828 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
829
830 list_add(&dest->n_list, &svc->destinations);
831 svc->num_dests++;
832
833 /* call the update_service function of its scheduler */
834 svc->scheduler->update_service(svc);
835
836 write_unlock_bh(&__ip_vs_svc_lock);
837 return 0;
838 }
839
840 /*
841 * Allocate and initialize the dest structure
842 */
843 ret = ip_vs_new_dest(svc, udest, &dest);
844 if (ret) {
845 return ret;
846 }
847
848 /*
849 * Add the dest entry into the list
850 */
851 atomic_inc(&dest->refcnt);
852
853 write_lock_bh(&__ip_vs_svc_lock);
854
855 /*
856 * Wait until all other svc users go away.
857 */
858 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
859
860 list_add(&dest->n_list, &svc->destinations);
861 svc->num_dests++;
862
863 /* call the update_service function of its scheduler */
864 svc->scheduler->update_service(svc);
865
866 write_unlock_bh(&__ip_vs_svc_lock);
867
868 LeaveFunction(2);
869
870 return 0;
871}
872
873
874/*
875 * Edit a destination in the given service
876 */
877static int
878ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
879{
880 struct ip_vs_dest *dest;
881 __u32 daddr = udest->addr;
882 __u16 dport = udest->port;
883
884 EnterFunction(2);
885
886 if (udest->weight < 0) {
887 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
888 return -ERANGE;
889 }
890
891 if (udest->l_threshold > udest->u_threshold) {
892 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
893 "upper threshold\n");
894 return -ERANGE;
895 }
896
897 /*
898 * Lookup the destination list
899 */
900 dest = ip_vs_lookup_dest(svc, daddr, dport);
901 if (dest == NULL) {
902 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
903 return -ENOENT;
904 }
905
906 __ip_vs_update_dest(svc, dest, udest);
907
908 write_lock_bh(&__ip_vs_svc_lock);
909
910 /* Wait until all other svc users go away */
911 while (atomic_read(&svc->usecnt) > 1) {};
912
913 /* call the update_service, because server weight may be changed */
914 svc->scheduler->update_service(svc);
915
916 write_unlock_bh(&__ip_vs_svc_lock);
917
918 LeaveFunction(2);
919
920 return 0;
921}
922
923
924/*
925 * Delete a destination (must be already unlinked from the service)
926 */
927static void __ip_vs_del_dest(struct ip_vs_dest *dest)
928{
929 ip_vs_kill_estimator(&dest->stats);
930
931 /*
932 * Remove it from the d-linked list with the real services.
933 */
934 write_lock_bh(&__ip_vs_rs_lock);
935 ip_vs_rs_unhash(dest);
936 write_unlock_bh(&__ip_vs_rs_lock);
937
938 /*
939 * Decrease the refcnt of the dest, and free the dest
940 * if nobody refers to it (refcnt=0). Otherwise, throw
941 * the destination into the trash.
942 */
943 if (atomic_dec_and_test(&dest->refcnt)) {
944 ip_vs_dst_reset(dest);
945 /* simply decrease svc->refcnt here, let the caller check
946 and release the service if nobody refers to it.
947 Only user context can release destination and service,
948 and only one user context can update virtual service at a
949 time, so the operation here is OK */
950 atomic_dec(&dest->svc->refcnt);
951 kfree(dest);
952 } else {
4b5bdf5c
RN
953 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
954 "dest->refcnt=%d\n",
1da177e4
LT
955 NIPQUAD(dest->addr), ntohs(dest->port),
956 atomic_read(&dest->refcnt));
957 list_add(&dest->n_list, &ip_vs_dest_trash);
958 atomic_inc(&dest->refcnt);
959 }
960}
961
962
963/*
964 * Unlink a destination from the given service
965 */
966static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
967 struct ip_vs_dest *dest,
968 int svcupd)
969{
970 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
971
972 /*
973 * Remove it from the d-linked destination list.
974 */
975 list_del(&dest->n_list);
976 svc->num_dests--;
977 if (svcupd) {
978 /*
979 * Call the update_service function of its scheduler
980 */
981 svc->scheduler->update_service(svc);
982 }
983}
984
985
986/*
987 * Delete a destination server in the given service
988 */
989static int
990ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
991{
992 struct ip_vs_dest *dest;
993 __u32 daddr = udest->addr;
994 __u16 dport = udest->port;
995
996 EnterFunction(2);
997
998 dest = ip_vs_lookup_dest(svc, daddr, dport);
999 if (dest == NULL) {
1000 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1001 return -ENOENT;
1002 }
1003
1004 write_lock_bh(&__ip_vs_svc_lock);
1005
1006 /*
1007 * Wait until all other svc users go away.
1008 */
1009 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1010
1011 /*
1012 * Unlink dest from the service
1013 */
1014 __ip_vs_unlink_dest(svc, dest, 1);
1015
1016 write_unlock_bh(&__ip_vs_svc_lock);
1017
1018 /*
1019 * Delete the destination
1020 */
1021 __ip_vs_del_dest(dest);
1022
1023 LeaveFunction(2);
1024
1025 return 0;
1026}
1027
1028
1029/*
1030 * Add a service into the service hash table
1031 */
1032static int
1033ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1034{
1035 int ret = 0;
1036 struct ip_vs_scheduler *sched = NULL;
1037 struct ip_vs_service *svc = NULL;
1038
1039 /* increase the module use count */
1040 ip_vs_use_count_inc();
1041
1042 /* Lookup the scheduler by 'u->sched_name' */
1043 sched = ip_vs_scheduler_get(u->sched_name);
1044 if (sched == NULL) {
1045 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1046 u->sched_name);
1047 ret = -ENOENT;
1048 goto out_mod_dec;
1049 }
1050
1051 svc = (struct ip_vs_service *)
1052 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1053 if (svc == NULL) {
1054 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1055 ret = -ENOMEM;
1056 goto out_err;
1057 }
1058 memset(svc, 0, sizeof(struct ip_vs_service));
1059
1060 /* I'm the first user of the service */
1061 atomic_set(&svc->usecnt, 1);
1062 atomic_set(&svc->refcnt, 0);
1063
1064 svc->protocol = u->protocol;
1065 svc->addr = u->addr;
1066 svc->port = u->port;
1067 svc->fwmark = u->fwmark;
1068 svc->flags = u->flags;
1069 svc->timeout = u->timeout * HZ;
1070 svc->netmask = u->netmask;
1071
1072 INIT_LIST_HEAD(&svc->destinations);
1073 rwlock_init(&svc->sched_lock);
1074 spin_lock_init(&svc->stats.lock);
1075
1076 /* Bind the scheduler */
1077 ret = ip_vs_bind_scheduler(svc, sched);
1078 if (ret)
1079 goto out_err;
1080 sched = NULL;
1081
1082 /* Update the virtual service counters */
1083 if (svc->port == FTPPORT)
1084 atomic_inc(&ip_vs_ftpsvc_counter);
1085 else if (svc->port == 0)
1086 atomic_inc(&ip_vs_nullsvc_counter);
1087
1088 ip_vs_new_estimator(&svc->stats);
1089 ip_vs_num_services++;
1090
1091 /* Hash the service into the service table */
1092 write_lock_bh(&__ip_vs_svc_lock);
1093 ip_vs_svc_hash(svc);
1094 write_unlock_bh(&__ip_vs_svc_lock);
1095
1096 *svc_p = svc;
1097 return 0;
1098
1099 out_err:
1100 if (svc != NULL) {
1101 if (svc->scheduler)
1102 ip_vs_unbind_scheduler(svc);
1103 if (svc->inc) {
1104 local_bh_disable();
1105 ip_vs_app_inc_put(svc->inc);
1106 local_bh_enable();
1107 }
1108 kfree(svc);
1109 }
1110 ip_vs_scheduler_put(sched);
1111
1112 out_mod_dec:
1113 /* decrease the module use count */
1114 ip_vs_use_count_dec();
1115
1116 return ret;
1117}
1118
1119
1120/*
1121 * Edit a service and bind it with a new scheduler
1122 */
1123static int
1124ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1125{
1126 struct ip_vs_scheduler *sched, *old_sched;
1127 int ret = 0;
1128
1129 /*
1130 * Lookup the scheduler, by 'u->sched_name'
1131 */
1132 sched = ip_vs_scheduler_get(u->sched_name);
1133 if (sched == NULL) {
1134 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1135 u->sched_name);
1136 return -ENOENT;
1137 }
1138 old_sched = sched;
1139
1140 write_lock_bh(&__ip_vs_svc_lock);
1141
1142 /*
1143 * Wait until all other svc users go away.
1144 */
1145 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1146
1147 /*
1148 * Set the flags and timeout value
1149 */
1150 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1151 svc->timeout = u->timeout * HZ;
1152 svc->netmask = u->netmask;
1153
1154 old_sched = svc->scheduler;
1155 if (sched != old_sched) {
1156 /*
1157 * Unbind the old scheduler
1158 */
1159 if ((ret = ip_vs_unbind_scheduler(svc))) {
1160 old_sched = sched;
1161 goto out;
1162 }
1163
1164 /*
1165 * Bind the new scheduler
1166 */
1167 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1168 /*
1169 * If ip_vs_bind_scheduler fails, restore the old
1170 * scheduler.
1171 * The main reason of failure is out of memory.
1172 *
1173 * The question is if the old scheduler can be
1174 * restored all the time. TODO: if it cannot be
1175 * restored some time, we must delete the service,
1176 * otherwise the system may crash.
1177 */
1178 ip_vs_bind_scheduler(svc, old_sched);
1179 old_sched = sched;
1180 goto out;
1181 }
1182 }
1183
1184 out:
1185 write_unlock_bh(&__ip_vs_svc_lock);
1186
1187 if (old_sched)
1188 ip_vs_scheduler_put(old_sched);
1189
1190 return ret;
1191}
1192
1193
1194/*
1195 * Delete a service from the service list
1196 * - The service must be unlinked, unlocked and not referenced!
1197 * - We are called under _bh lock
1198 */
1199static void __ip_vs_del_service(struct ip_vs_service *svc)
1200{
1201 struct ip_vs_dest *dest, *nxt;
1202 struct ip_vs_scheduler *old_sched;
1203
1204 ip_vs_num_services--;
1205 ip_vs_kill_estimator(&svc->stats);
1206
1207 /* Unbind scheduler */
1208 old_sched = svc->scheduler;
1209 ip_vs_unbind_scheduler(svc);
1210 if (old_sched)
1211 ip_vs_scheduler_put(old_sched);
1212
1213 /* Unbind app inc */
1214 if (svc->inc) {
1215 ip_vs_app_inc_put(svc->inc);
1216 svc->inc = NULL;
1217 }
1218
1219 /*
1220 * Unlink the whole destination list
1221 */
1222 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1223 __ip_vs_unlink_dest(svc, dest, 0);
1224 __ip_vs_del_dest(dest);
1225 }
1226
1227 /*
1228 * Update the virtual service counters
1229 */
1230 if (svc->port == FTPPORT)
1231 atomic_dec(&ip_vs_ftpsvc_counter);
1232 else if (svc->port == 0)
1233 atomic_dec(&ip_vs_nullsvc_counter);
1234
1235 /*
1236 * Free the service if nobody refers to it
1237 */
1238 if (atomic_read(&svc->refcnt) == 0)
1239 kfree(svc);
1240
1241 /* decrease the module use count */
1242 ip_vs_use_count_dec();
1243}
1244
1245/*
1246 * Delete a service from the service list
1247 */
1248static int ip_vs_del_service(struct ip_vs_service *svc)
1249{
1250 if (svc == NULL)
1251 return -EEXIST;
1252
1253 /*
1254 * Unhash it from the service table
1255 */
1256 write_lock_bh(&__ip_vs_svc_lock);
1257
1258 ip_vs_svc_unhash(svc);
1259
1260 /*
1261 * Wait until all the svc users go away.
1262 */
1263 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1264
1265 __ip_vs_del_service(svc);
1266
1267 write_unlock_bh(&__ip_vs_svc_lock);
1268
1269 return 0;
1270}
1271
1272
1273/*
1274 * Flush all the virtual services
1275 */
1276static int ip_vs_flush(void)
1277{
1278 int idx;
1279 struct ip_vs_service *svc, *nxt;
1280
1281 /*
1282 * Flush the service table hashed by <protocol,addr,port>
1283 */
1284 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1285 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1286 write_lock_bh(&__ip_vs_svc_lock);
1287 ip_vs_svc_unhash(svc);
1288 /*
1289 * Wait until all the svc users go away.
1290 */
1291 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1292 __ip_vs_del_service(svc);
1293 write_unlock_bh(&__ip_vs_svc_lock);
1294 }
1295 }
1296
1297 /*
1298 * Flush the service table hashed by fwmark
1299 */
1300 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1301 list_for_each_entry_safe(svc, nxt,
1302 &ip_vs_svc_fwm_table[idx], f_list) {
1303 write_lock_bh(&__ip_vs_svc_lock);
1304 ip_vs_svc_unhash(svc);
1305 /*
1306 * Wait until all the svc users go away.
1307 */
1308 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1309 __ip_vs_del_service(svc);
1310 write_unlock_bh(&__ip_vs_svc_lock);
1311 }
1312 }
1313
1314 return 0;
1315}
1316
1317
1318/*
1319 * Zero counters in a service or all services
1320 */
1321static int ip_vs_zero_service(struct ip_vs_service *svc)
1322{
1323 struct ip_vs_dest *dest;
1324
1325 write_lock_bh(&__ip_vs_svc_lock);
1326 list_for_each_entry(dest, &svc->destinations, n_list) {
1327 ip_vs_zero_stats(&dest->stats);
1328 }
1329 ip_vs_zero_stats(&svc->stats);
1330 write_unlock_bh(&__ip_vs_svc_lock);
1331 return 0;
1332}
1333
1334static int ip_vs_zero_all(void)
1335{
1336 int idx;
1337 struct ip_vs_service *svc;
1338
1339 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1340 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1341 ip_vs_zero_service(svc);
1342 }
1343 }
1344
1345 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1346 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1347 ip_vs_zero_service(svc);
1348 }
1349 }
1350
1351 ip_vs_zero_stats(&ip_vs_stats);
1352 return 0;
1353}
1354
1355
1356static int
1357proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1358 void __user *buffer, size_t *lenp, loff_t *ppos)
1359{
1360 int *valp = table->data;
1361 int val = *valp;
1362 int rc;
1363
1364 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1365 if (write && (*valp != val)) {
1366 if ((*valp < 0) || (*valp > 3)) {
1367 /* Restore the correct value */
1368 *valp = val;
1369 } else {
1da177e4 1370 update_defense_level();
1da177e4
LT
1371 }
1372 }
1373 return rc;
1374}
1375
1376
1377static int
1378proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1379 void __user *buffer, size_t *lenp, loff_t *ppos)
1380{
1381 int *valp = table->data;
1382 int val[2];
1383 int rc;
1384
1385 /* backup the value first */
1386 memcpy(val, valp, sizeof(val));
1387
1388 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1389 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1390 /* Restore the correct value */
1391 memcpy(valp, val, sizeof(val));
1392 }
1393 return rc;
1394}
1395
1396
1397/*
1398 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1399 */
1400
1401static struct ctl_table vs_vars[] = {
1402 {
1403 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1404 .procname = "amemthresh",
1405 .data = &sysctl_ip_vs_amemthresh,
1406 .maxlen = sizeof(int),
1407 .mode = 0644,
1408 .proc_handler = &proc_dointvec,
1409 },
1410#ifdef CONFIG_IP_VS_DEBUG
1411 {
1412 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1413 .procname = "debug_level",
1414 .data = &sysctl_ip_vs_debug_level,
1415 .maxlen = sizeof(int),
1416 .mode = 0644,
1417 .proc_handler = &proc_dointvec,
1418 },
1419#endif
1420 {
1421 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1422 .procname = "am_droprate",
1423 .data = &sysctl_ip_vs_am_droprate,
1424 .maxlen = sizeof(int),
1425 .mode = 0644,
1426 .proc_handler = &proc_dointvec,
1427 },
1428 {
1429 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1430 .procname = "drop_entry",
1431 .data = &sysctl_ip_vs_drop_entry,
1432 .maxlen = sizeof(int),
1433 .mode = 0644,
1434 .proc_handler = &proc_do_defense_mode,
1435 },
1436 {
1437 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1438 .procname = "drop_packet",
1439 .data = &sysctl_ip_vs_drop_packet,
1440 .maxlen = sizeof(int),
1441 .mode = 0644,
1442 .proc_handler = &proc_do_defense_mode,
1443 },
1444 {
1445 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1446 .procname = "secure_tcp",
1447 .data = &sysctl_ip_vs_secure_tcp,
1448 .maxlen = sizeof(int),
1449 .mode = 0644,
1450 .proc_handler = &proc_do_defense_mode,
1451 },
1452#if 0
1453 {
1454 .ctl_name = NET_IPV4_VS_TO_ES,
1455 .procname = "timeout_established",
1456 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1457 .maxlen = sizeof(int),
1458 .mode = 0644,
1459 .proc_handler = &proc_dointvec_jiffies,
1460 },
1461 {
1462 .ctl_name = NET_IPV4_VS_TO_SS,
1463 .procname = "timeout_synsent",
1464 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1465 .maxlen = sizeof(int),
1466 .mode = 0644,
1467 .proc_handler = &proc_dointvec_jiffies,
1468 },
1469 {
1470 .ctl_name = NET_IPV4_VS_TO_SR,
1471 .procname = "timeout_synrecv",
1472 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1473 .maxlen = sizeof(int),
1474 .mode = 0644,
1475 .proc_handler = &proc_dointvec_jiffies,
1476 },
1477 {
1478 .ctl_name = NET_IPV4_VS_TO_FW,
1479 .procname = "timeout_finwait",
1480 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1481 .maxlen = sizeof(int),
1482 .mode = 0644,
1483 .proc_handler = &proc_dointvec_jiffies,
1484 },
1485 {
1486 .ctl_name = NET_IPV4_VS_TO_TW,
1487 .procname = "timeout_timewait",
1488 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1489 .maxlen = sizeof(int),
1490 .mode = 0644,
1491 .proc_handler = &proc_dointvec_jiffies,
1492 },
1493 {
1494 .ctl_name = NET_IPV4_VS_TO_CL,
1495 .procname = "timeout_close",
1496 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1497 .maxlen = sizeof(int),
1498 .mode = 0644,
1499 .proc_handler = &proc_dointvec_jiffies,
1500 },
1501 {
1502 .ctl_name = NET_IPV4_VS_TO_CW,
1503 .procname = "timeout_closewait",
1504 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1505 .maxlen = sizeof(int),
1506 .mode = 0644,
1507 .proc_handler = &proc_dointvec_jiffies,
1508 },
1509 {
1510 .ctl_name = NET_IPV4_VS_TO_LA,
1511 .procname = "timeout_lastack",
1512 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1513 .maxlen = sizeof(int),
1514 .mode = 0644,
1515 .proc_handler = &proc_dointvec_jiffies,
1516 },
1517 {
1518 .ctl_name = NET_IPV4_VS_TO_LI,
1519 .procname = "timeout_listen",
1520 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1521 .maxlen = sizeof(int),
1522 .mode = 0644,
1523 .proc_handler = &proc_dointvec_jiffies,
1524 },
1525 {
1526 .ctl_name = NET_IPV4_VS_TO_SA,
1527 .procname = "timeout_synack",
1528 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1529 .maxlen = sizeof(int),
1530 .mode = 0644,
1531 .proc_handler = &proc_dointvec_jiffies,
1532 },
1533 {
1534 .ctl_name = NET_IPV4_VS_TO_UDP,
1535 .procname = "timeout_udp",
1536 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1537 .maxlen = sizeof(int),
1538 .mode = 0644,
1539 .proc_handler = &proc_dointvec_jiffies,
1540 },
1541 {
1542 .ctl_name = NET_IPV4_VS_TO_ICMP,
1543 .procname = "timeout_icmp",
1544 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1545 .maxlen = sizeof(int),
1546 .mode = 0644,
1547 .proc_handler = &proc_dointvec_jiffies,
1548 },
1549#endif
1550 {
1551 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1552 .procname = "cache_bypass",
1553 .data = &sysctl_ip_vs_cache_bypass,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = &proc_dointvec,
1557 },
1558 {
1559 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1560 .procname = "expire_nodest_conn",
1561 .data = &sysctl_ip_vs_expire_nodest_conn,
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = &proc_dointvec,
1565 },
1566 {
1567 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1568 .procname = "expire_quiescent_template",
1569 .data = &sysctl_ip_vs_expire_quiescent_template,
1570 .maxlen = sizeof(int),
1571 .mode = 0644,
1572 .proc_handler = &proc_dointvec,
1573 },
1574 {
1575 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1576 .procname = "sync_threshold",
1577 .data = &sysctl_ip_vs_sync_threshold,
1578 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1579 .mode = 0644,
1580 .proc_handler = &proc_do_sync_threshold,
1581 },
1582 {
1583 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1584 .procname = "nat_icmp_send",
1585 .data = &sysctl_ip_vs_nat_icmp_send,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_dointvec,
1589 },
1590 { .ctl_name = 0 }
1591};
1592
1593static ctl_table vs_table[] = {
1594 {
1595 .ctl_name = NET_IPV4_VS,
1596 .procname = "vs",
1597 .mode = 0555,
1598 .child = vs_vars
1599 },
1600 { .ctl_name = 0 }
1601};
1602
bf0ff9e5 1603static ctl_table ipvs_ipv4_table[] = {
1da177e4
LT
1604 {
1605 .ctl_name = NET_IPV4,
1606 .procname = "ipv4",
1607 .mode = 0555,
1608 .child = vs_table,
1609 },
1610 { .ctl_name = 0 }
1611};
1612
1613static ctl_table vs_root_table[] = {
1614 {
1615 .ctl_name = CTL_NET,
1616 .procname = "net",
1617 .mode = 0555,
bf0ff9e5 1618 .child = ipvs_ipv4_table,
1da177e4
LT
1619 },
1620 { .ctl_name = 0 }
1621};
1622
1623static struct ctl_table_header * sysctl_header;
1624
1625#ifdef CONFIG_PROC_FS
1626
1627struct ip_vs_iter {
1628 struct list_head *table;
1629 int bucket;
1630};
1631
1632/*
1633 * Write the contents of the VS rule table to a PROCfs file.
1634 * (It is kept just for backward compatibility)
1635 */
1636static inline const char *ip_vs_fwd_name(unsigned flags)
1637{
1638 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1639 case IP_VS_CONN_F_LOCALNODE:
1640 return "Local";
1641 case IP_VS_CONN_F_TUNNEL:
1642 return "Tunnel";
1643 case IP_VS_CONN_F_DROUTE:
1644 return "Route";
1645 default:
1646 return "Masq";
1647 }
1648}
1649
1650
1651/* Get the Nth entry in the two lists */
1652static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1653{
1654 struct ip_vs_iter *iter = seq->private;
1655 int idx;
1656 struct ip_vs_service *svc;
1657
1658 /* look in hash by protocol */
1659 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1660 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1661 if (pos-- == 0){
1662 iter->table = ip_vs_svc_table;
1663 iter->bucket = idx;
1664 return svc;
1665 }
1666 }
1667 }
1668
1669 /* keep looking in fwmark */
1670 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1671 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1672 if (pos-- == 0) {
1673 iter->table = ip_vs_svc_fwm_table;
1674 iter->bucket = idx;
1675 return svc;
1676 }
1677 }
1678 }
1679
1680 return NULL;
1681}
1682
1683static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1684{
1685
1686 read_lock_bh(&__ip_vs_svc_lock);
1687 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1688}
1689
1690
1691static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1692{
1693 struct list_head *e;
1694 struct ip_vs_iter *iter;
1695 struct ip_vs_service *svc;
1696
1697 ++*pos;
1698 if (v == SEQ_START_TOKEN)
1699 return ip_vs_info_array(seq,0);
1700
1701 svc = v;
1702 iter = seq->private;
1703
1704 if (iter->table == ip_vs_svc_table) {
1705 /* next service in table hashed by protocol */
1706 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1707 return list_entry(e, struct ip_vs_service, s_list);
1708
1709
1710 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1711 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1712 s_list) {
1713 return svc;
1714 }
1715 }
1716
1717 iter->table = ip_vs_svc_fwm_table;
1718 iter->bucket = -1;
1719 goto scan_fwmark;
1720 }
1721
1722 /* next service in hashed by fwmark */
1723 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1724 return list_entry(e, struct ip_vs_service, f_list);
1725
1726 scan_fwmark:
1727 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1728 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1729 f_list)
1730 return svc;
1731 }
1732
1733 return NULL;
1734}
1735
1736static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1737{
1738 read_unlock_bh(&__ip_vs_svc_lock);
1739}
1740
1741
1742static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1743{
1744 if (v == SEQ_START_TOKEN) {
1745 seq_printf(seq,
1746 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1747 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1748 seq_puts(seq,
1749 "Prot LocalAddress:Port Scheduler Flags\n");
1750 seq_puts(seq,
1751 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1752 } else {
1753 const struct ip_vs_service *svc = v;
1754 const struct ip_vs_iter *iter = seq->private;
1755 const struct ip_vs_dest *dest;
1756
1757 if (iter->table == ip_vs_svc_table)
1758 seq_printf(seq, "%s %08X:%04X %s ",
1759 ip_vs_proto_name(svc->protocol),
1760 ntohl(svc->addr),
1761 ntohs(svc->port),
1762 svc->scheduler->name);
1763 else
1764 seq_printf(seq, "FWM %08X %s ",
1765 svc->fwmark, svc->scheduler->name);
1766
1767 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1768 seq_printf(seq, "persistent %d %08X\n",
1769 svc->timeout,
1770 ntohl(svc->netmask));
1771 else
1772 seq_putc(seq, '\n');
1773
1774 list_for_each_entry(dest, &svc->destinations, n_list) {
1775 seq_printf(seq,
1776 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1777 ntohl(dest->addr), ntohs(dest->port),
1778 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1779 atomic_read(&dest->weight),
1780 atomic_read(&dest->activeconns),
1781 atomic_read(&dest->inactconns));
1782 }
1783 }
1784 return 0;
1785}
1786
1787static struct seq_operations ip_vs_info_seq_ops = {
1788 .start = ip_vs_info_seq_start,
1789 .next = ip_vs_info_seq_next,
1790 .stop = ip_vs_info_seq_stop,
1791 .show = ip_vs_info_seq_show,
1792};
1793
1794static int ip_vs_info_open(struct inode *inode, struct file *file)
1795{
1796 struct seq_file *seq;
1797 int rc = -ENOMEM;
1798 struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1799
1800 if (!s)
1801 goto out;
1802
1803 rc = seq_open(file, &ip_vs_info_seq_ops);
1804 if (rc)
1805 goto out_kfree;
1806
1807 seq = file->private_data;
1808 seq->private = s;
1809 memset(s, 0, sizeof(*s));
1810out:
1811 return rc;
1812out_kfree:
1813 kfree(s);
1814 goto out;
1815}
1816
1817static struct file_operations ip_vs_info_fops = {
1818 .owner = THIS_MODULE,
1819 .open = ip_vs_info_open,
1820 .read = seq_read,
1821 .llseek = seq_lseek,
1822 .release = seq_release_private,
1823};
1824
1825#endif
1826
1827struct ip_vs_stats ip_vs_stats;
1828
1829#ifdef CONFIG_PROC_FS
1830static int ip_vs_stats_show(struct seq_file *seq, void *v)
1831{
1832
1833/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1834 seq_puts(seq,
1835 " Total Incoming Outgoing Incoming Outgoing\n");
1836 seq_printf(seq,
1837 " Conns Packets Packets Bytes Bytes\n");
1838
1839 spin_lock_bh(&ip_vs_stats.lock);
1840 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1841 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1842 (unsigned long long) ip_vs_stats.inbytes,
1843 (unsigned long long) ip_vs_stats.outbytes);
1844
1845/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1846 seq_puts(seq,
1847 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1848 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1849 ip_vs_stats.cps,
1850 ip_vs_stats.inpps,
1851 ip_vs_stats.outpps,
1852 ip_vs_stats.inbps,
1853 ip_vs_stats.outbps);
1854 spin_unlock_bh(&ip_vs_stats.lock);
1855
1856 return 0;
1857}
1858
1859static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1860{
1861 return single_open(file, ip_vs_stats_show, NULL);
1862}
1863
1864static struct file_operations ip_vs_stats_fops = {
1865 .owner = THIS_MODULE,
1866 .open = ip_vs_stats_seq_open,
1867 .read = seq_read,
1868 .llseek = seq_lseek,
1869 .release = single_release,
1870};
1871
1872#endif
1873
1874/*
1875 * Set timeout values for tcp tcpfin udp in the timeout_table.
1876 */
1877static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1878{
1879 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1880 u->tcp_timeout,
1881 u->tcp_fin_timeout,
1882 u->udp_timeout);
1883
1884#ifdef CONFIG_IP_VS_PROTO_TCP
1885 if (u->tcp_timeout) {
1886 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1887 = u->tcp_timeout * HZ;
1888 }
1889
1890 if (u->tcp_fin_timeout) {
1891 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1892 = u->tcp_fin_timeout * HZ;
1893 }
1894#endif
1895
1896#ifdef CONFIG_IP_VS_PROTO_UDP
1897 if (u->udp_timeout) {
1898 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1899 = u->udp_timeout * HZ;
1900 }
1901#endif
1902 return 0;
1903}
1904
1905
1906#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1907#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1908#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1909 sizeof(struct ip_vs_dest_user))
1910#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1911#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1912#define MAX_ARG_LEN SVCDEST_ARG_LEN
1913
9b5b5cff 1914static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1915 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1917 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1919 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1923 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1924 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1925 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1926};
1927
1928static int
1929do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1930{
1931 int ret;
1932 unsigned char arg[MAX_ARG_LEN];
1933 struct ip_vs_service_user *usvc;
1934 struct ip_vs_service *svc;
1935 struct ip_vs_dest_user *udest;
1936
1937 if (!capable(CAP_NET_ADMIN))
1938 return -EPERM;
1939
1940 if (len != set_arglen[SET_CMDID(cmd)]) {
1941 IP_VS_ERR("set_ctl: len %u != %u\n",
1942 len, set_arglen[SET_CMDID(cmd)]);
1943 return -EINVAL;
1944 }
1945
1946 if (copy_from_user(arg, user, len) != 0)
1947 return -EFAULT;
1948
1949 /* increase the module use count */
1950 ip_vs_use_count_inc();
1951
1952 if (down_interruptible(&__ip_vs_mutex)) {
1953 ret = -ERESTARTSYS;
1954 goto out_dec;
1955 }
1956
1957 if (cmd == IP_VS_SO_SET_FLUSH) {
1958 /* Flush the virtual service */
1959 ret = ip_vs_flush();
1960 goto out_unlock;
1961 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1962 /* Set timeout values for (tcp tcpfin udp) */
1963 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1964 goto out_unlock;
1965 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1966 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1967 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1968 goto out_unlock;
1969 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1970 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1971 ret = stop_sync_thread(dm->state);
1972 goto out_unlock;
1973 }
1974
1975 usvc = (struct ip_vs_service_user *)arg;
1976 udest = (struct ip_vs_dest_user *)(usvc + 1);
1977
1978 if (cmd == IP_VS_SO_SET_ZERO) {
1979 /* if no service address is set, zero counters in all */
1980 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1981 ret = ip_vs_zero_all();
1982 goto out_unlock;
1983 }
1984 }
1985
1986 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1987 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1988 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1989 usvc->protocol, NIPQUAD(usvc->addr),
1990 ntohs(usvc->port), usvc->sched_name);
1991 ret = -EFAULT;
1992 goto out_unlock;
1993 }
1994
1995 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1996 if (usvc->fwmark == 0)
1997 svc = __ip_vs_service_get(usvc->protocol,
1998 usvc->addr, usvc->port);
1999 else
2000 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2001
2002 if (cmd != IP_VS_SO_SET_ADD
2003 && (svc == NULL || svc->protocol != usvc->protocol)) {
2004 ret = -ESRCH;
2005 goto out_unlock;
2006 }
2007
2008 switch (cmd) {
2009 case IP_VS_SO_SET_ADD:
2010 if (svc != NULL)
2011 ret = -EEXIST;
2012 else
2013 ret = ip_vs_add_service(usvc, &svc);
2014 break;
2015 case IP_VS_SO_SET_EDIT:
2016 ret = ip_vs_edit_service(svc, usvc);
2017 break;
2018 case IP_VS_SO_SET_DEL:
2019 ret = ip_vs_del_service(svc);
2020 if (!ret)
2021 goto out_unlock;
2022 break;
2023 case IP_VS_SO_SET_ZERO:
2024 ret = ip_vs_zero_service(svc);
2025 break;
2026 case IP_VS_SO_SET_ADDDEST:
2027 ret = ip_vs_add_dest(svc, udest);
2028 break;
2029 case IP_VS_SO_SET_EDITDEST:
2030 ret = ip_vs_edit_dest(svc, udest);
2031 break;
2032 case IP_VS_SO_SET_DELDEST:
2033 ret = ip_vs_del_dest(svc, udest);
2034 break;
2035 default:
2036 ret = -EINVAL;
2037 }
2038
2039 if (svc)
2040 ip_vs_service_put(svc);
2041
2042 out_unlock:
2043 up(&__ip_vs_mutex);
2044 out_dec:
2045 /* decrease the module use count */
2046 ip_vs_use_count_dec();
2047
2048 return ret;
2049}
2050
2051
2052static void
2053ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2054{
2055 spin_lock_bh(&src->lock);
2056 memcpy(dst, src, (char*)&src->lock - (char*)src);
2057 spin_unlock_bh(&src->lock);
2058}
2059
2060static void
2061ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2062{
2063 dst->protocol = src->protocol;
2064 dst->addr = src->addr;
2065 dst->port = src->port;
2066 dst->fwmark = src->fwmark;
4da62fc7 2067 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2068 dst->flags = src->flags;
2069 dst->timeout = src->timeout / HZ;
2070 dst->netmask = src->netmask;
2071 dst->num_dests = src->num_dests;
2072 ip_vs_copy_stats(&dst->stats, &src->stats);
2073}
2074
2075static inline int
2076__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2077 struct ip_vs_get_services __user *uptr)
2078{
2079 int idx, count=0;
2080 struct ip_vs_service *svc;
2081 struct ip_vs_service_entry entry;
2082 int ret = 0;
2083
2084 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2085 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2086 if (count >= get->num_services)
2087 goto out;
4da62fc7 2088 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2089 ip_vs_copy_service(&entry, svc);
2090 if (copy_to_user(&uptr->entrytable[count],
2091 &entry, sizeof(entry))) {
2092 ret = -EFAULT;
2093 goto out;
2094 }
2095 count++;
2096 }
2097 }
2098
2099 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2100 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2101 if (count >= get->num_services)
2102 goto out;
4da62fc7 2103 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2104 ip_vs_copy_service(&entry, svc);
2105 if (copy_to_user(&uptr->entrytable[count],
2106 &entry, sizeof(entry))) {
2107 ret = -EFAULT;
2108 goto out;
2109 }
2110 count++;
2111 }
2112 }
2113 out:
2114 return ret;
2115}
2116
2117static inline int
2118__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2119 struct ip_vs_get_dests __user *uptr)
2120{
2121 struct ip_vs_service *svc;
2122 int ret = 0;
2123
2124 if (get->fwmark)
2125 svc = __ip_vs_svc_fwm_get(get->fwmark);
2126 else
2127 svc = __ip_vs_service_get(get->protocol,
2128 get->addr, get->port);
2129 if (svc) {
2130 int count = 0;
2131 struct ip_vs_dest *dest;
2132 struct ip_vs_dest_entry entry;
2133
2134 list_for_each_entry(dest, &svc->destinations, n_list) {
2135 if (count >= get->num_dests)
2136 break;
2137
2138 entry.addr = dest->addr;
2139 entry.port = dest->port;
2140 entry.conn_flags = atomic_read(&dest->conn_flags);
2141 entry.weight = atomic_read(&dest->weight);
2142 entry.u_threshold = dest->u_threshold;
2143 entry.l_threshold = dest->l_threshold;
2144 entry.activeconns = atomic_read(&dest->activeconns);
2145 entry.inactconns = atomic_read(&dest->inactconns);
2146 entry.persistconns = atomic_read(&dest->persistconns);
2147 ip_vs_copy_stats(&entry.stats, &dest->stats);
2148 if (copy_to_user(&uptr->entrytable[count],
2149 &entry, sizeof(entry))) {
2150 ret = -EFAULT;
2151 break;
2152 }
2153 count++;
2154 }
2155 ip_vs_service_put(svc);
2156 } else
2157 ret = -ESRCH;
2158 return ret;
2159}
2160
2161static inline void
2162__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2163{
2164#ifdef CONFIG_IP_VS_PROTO_TCP
2165 u->tcp_timeout =
2166 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2167 u->tcp_fin_timeout =
2168 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2169#endif
2170#ifdef CONFIG_IP_VS_PROTO_UDP
2171 u->udp_timeout =
2172 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2173#endif
2174}
2175
2176
2177#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2178#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2179#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2180#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2181#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2182#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2183#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2184
9b5b5cff 2185static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2186 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2187 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2188 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2189 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2190 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2191 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2192 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2193};
2194
2195static int
2196do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2197{
2198 unsigned char arg[128];
2199 int ret = 0;
2200
2201 if (!capable(CAP_NET_ADMIN))
2202 return -EPERM;
2203
2204 if (*len < get_arglen[GET_CMDID(cmd)]) {
2205 IP_VS_ERR("get_ctl: len %u < %u\n",
2206 *len, get_arglen[GET_CMDID(cmd)]);
2207 return -EINVAL;
2208 }
2209
2210 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2211 return -EFAULT;
2212
2213 if (down_interruptible(&__ip_vs_mutex))
2214 return -ERESTARTSYS;
2215
2216 switch (cmd) {
2217 case IP_VS_SO_GET_VERSION:
2218 {
2219 char buf[64];
2220
2221 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2222 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2223 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2224 ret = -EFAULT;
2225 goto out;
2226 }
2227 *len = strlen(buf)+1;
2228 }
2229 break;
2230
2231 case IP_VS_SO_GET_INFO:
2232 {
2233 struct ip_vs_getinfo info;
2234 info.version = IP_VS_VERSION_CODE;
2235 info.size = IP_VS_CONN_TAB_SIZE;
2236 info.num_services = ip_vs_num_services;
2237 if (copy_to_user(user, &info, sizeof(info)) != 0)
2238 ret = -EFAULT;
2239 }
2240 break;
2241
2242 case IP_VS_SO_GET_SERVICES:
2243 {
2244 struct ip_vs_get_services *get;
2245 int size;
2246
2247 get = (struct ip_vs_get_services *)arg;
2248 size = sizeof(*get) +
2249 sizeof(struct ip_vs_service_entry) * get->num_services;
2250 if (*len != size) {
2251 IP_VS_ERR("length: %u != %u\n", *len, size);
2252 ret = -EINVAL;
2253 goto out;
2254 }
2255 ret = __ip_vs_get_service_entries(get, user);
2256 }
2257 break;
2258
2259 case IP_VS_SO_GET_SERVICE:
2260 {
2261 struct ip_vs_service_entry *entry;
2262 struct ip_vs_service *svc;
2263
2264 entry = (struct ip_vs_service_entry *)arg;
2265 if (entry->fwmark)
2266 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2267 else
2268 svc = __ip_vs_service_get(entry->protocol,
2269 entry->addr, entry->port);
2270 if (svc) {
2271 ip_vs_copy_service(entry, svc);
2272 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2273 ret = -EFAULT;
2274 ip_vs_service_put(svc);
2275 } else
2276 ret = -ESRCH;
2277 }
2278 break;
2279
2280 case IP_VS_SO_GET_DESTS:
2281 {
2282 struct ip_vs_get_dests *get;
2283 int size;
2284
2285 get = (struct ip_vs_get_dests *)arg;
2286 size = sizeof(*get) +
2287 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2288 if (*len != size) {
2289 IP_VS_ERR("length: %u != %u\n", *len, size);
2290 ret = -EINVAL;
2291 goto out;
2292 }
2293 ret = __ip_vs_get_dest_entries(get, user);
2294 }
2295 break;
2296
2297 case IP_VS_SO_GET_TIMEOUT:
2298 {
2299 struct ip_vs_timeout_user t;
2300
2301 __ip_vs_get_timeouts(&t);
2302 if (copy_to_user(user, &t, sizeof(t)) != 0)
2303 ret = -EFAULT;
2304 }
2305 break;
2306
2307 case IP_VS_SO_GET_DAEMON:
2308 {
2309 struct ip_vs_daemon_user d[2];
2310
2311 memset(&d, 0, sizeof(d));
2312 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2313 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2314 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2315 d[0].syncid = ip_vs_master_syncid;
2316 }
2317 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2318 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2319 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2320 d[1].syncid = ip_vs_backup_syncid;
2321 }
2322 if (copy_to_user(user, &d, sizeof(d)) != 0)
2323 ret = -EFAULT;
2324 }
2325 break;
2326
2327 default:
2328 ret = -EINVAL;
2329 }
2330
2331 out:
2332 up(&__ip_vs_mutex);
2333 return ret;
2334}
2335
2336
2337static struct nf_sockopt_ops ip_vs_sockopts = {
2338 .pf = PF_INET,
2339 .set_optmin = IP_VS_BASE_CTL,
2340 .set_optmax = IP_VS_SO_SET_MAX+1,
2341 .set = do_ip_vs_set_ctl,
2342 .get_optmin = IP_VS_BASE_CTL,
2343 .get_optmax = IP_VS_SO_GET_MAX+1,
2344 .get = do_ip_vs_get_ctl,
2345};
2346
2347
2348int ip_vs_control_init(void)
2349{
2350 int ret;
2351 int idx;
2352
2353 EnterFunction(2);
2354
2355 ret = nf_register_sockopt(&ip_vs_sockopts);
2356 if (ret) {
2357 IP_VS_ERR("cannot register sockopt.\n");
2358 return ret;
2359 }
2360
2361 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
2362 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
2363
2364 sysctl_header = register_sysctl_table(vs_root_table, 0);
2365
2366 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2367 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2368 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2369 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2370 }
2371 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2372 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2373 }
2374
2375 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2376 spin_lock_init(&ip_vs_stats.lock);
2377 ip_vs_new_estimator(&ip_vs_stats);
2378
2379 /* Hook the defense timer */
2380 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2381
2382 LeaveFunction(2);
2383 return 0;
2384}
2385
2386
2387void ip_vs_control_cleanup(void)
2388{
2389 EnterFunction(2);
2390 ip_vs_trash_cleanup();
2391 cancel_rearming_delayed_work(&defense_work);
2392 ip_vs_kill_estimator(&ip_vs_stats);
2393 unregister_sysctl_table(sysctl_header);
2394 proc_net_remove("ip_vs_stats");
2395 proc_net_remove("ip_vs");
2396 nf_unregister_sockopt(&ip_vs_sockopts);
2397 LeaveFunction(2);
2398}