]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
Linux-2.6.12-rc2
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * Changes:
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/sysctl.h>
28#include <linux/proc_fs.h>
29#include <linux/workqueue.h>
30#include <linux/swap.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33
34#include <linux/netfilter.h>
35#include <linux/netfilter_ipv4.h>
36
37#include <net/ip.h>
38#include <net/sock.h>
39
40#include <asm/uaccess.h>
41
42#include <net/ip_vs.h>
43
44/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
45static DECLARE_MUTEX(__ip_vs_mutex);
46
47/* lock for service table */
48static DEFINE_RWLOCK(__ip_vs_svc_lock);
49
50/* lock for table with the real services */
51static DEFINE_RWLOCK(__ip_vs_rs_lock);
52
53/* lock for state and timeout tables */
54static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
55
56/* lock for drop entry handling */
57static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
58
59/* lock for drop packet handling */
60static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
61
62/* 1/rate drop and drop-entry variables */
63int ip_vs_drop_rate = 0;
64int ip_vs_drop_counter = 0;
65static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
66
67/* number of virtual services */
68static int ip_vs_num_services = 0;
69
70/* sysctl variables */
71static int sysctl_ip_vs_drop_entry = 0;
72static int sysctl_ip_vs_drop_packet = 0;
73static int sysctl_ip_vs_secure_tcp = 0;
74static int sysctl_ip_vs_amemthresh = 1024;
75static int sysctl_ip_vs_am_droprate = 10;
76int sysctl_ip_vs_cache_bypass = 0;
77int sysctl_ip_vs_expire_nodest_conn = 0;
78int sysctl_ip_vs_expire_quiescent_template = 0;
79int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
80int sysctl_ip_vs_nat_icmp_send = 0;
81
82
83#ifdef CONFIG_IP_VS_DEBUG
84static int sysctl_ip_vs_debug_level = 0;
85
86int ip_vs_get_debug_level(void)
87{
88 return sysctl_ip_vs_debug_level;
89}
90#endif
91
92/*
93 * update_defense_level is called from keventd and from sysctl.
94 */
95static void update_defense_level(void)
96{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
111 nomem = (availmem < sysctl_ip_vs_amemthresh);
112
113 /* drop_entry */
114 spin_lock(&__ip_vs_dropentry_lock);
115 switch (sysctl_ip_vs_drop_entry) {
116 case 0:
117 atomic_set(&ip_vs_dropentry, 0);
118 break;
119 case 1:
120 if (nomem) {
121 atomic_set(&ip_vs_dropentry, 1);
122 sysctl_ip_vs_drop_entry = 2;
123 } else {
124 atomic_set(&ip_vs_dropentry, 0);
125 }
126 break;
127 case 2:
128 if (nomem) {
129 atomic_set(&ip_vs_dropentry, 1);
130 } else {
131 atomic_set(&ip_vs_dropentry, 0);
132 sysctl_ip_vs_drop_entry = 1;
133 };
134 break;
135 case 3:
136 atomic_set(&ip_vs_dropentry, 1);
137 break;
138 }
139 spin_unlock(&__ip_vs_dropentry_lock);
140
141 /* drop_packet */
142 spin_lock(&__ip_vs_droppacket_lock);
143 switch (sysctl_ip_vs_drop_packet) {
144 case 0:
145 ip_vs_drop_rate = 0;
146 break;
147 case 1:
148 if (nomem) {
149 ip_vs_drop_rate = ip_vs_drop_counter
150 = sysctl_ip_vs_amemthresh /
151 (sysctl_ip_vs_amemthresh-availmem);
152 sysctl_ip_vs_drop_packet = 2;
153 } else {
154 ip_vs_drop_rate = 0;
155 }
156 break;
157 case 2:
158 if (nomem) {
159 ip_vs_drop_rate = ip_vs_drop_counter
160 = sysctl_ip_vs_amemthresh /
161 (sysctl_ip_vs_amemthresh-availmem);
162 } else {
163 ip_vs_drop_rate = 0;
164 sysctl_ip_vs_drop_packet = 1;
165 }
166 break;
167 case 3:
168 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
169 break;
170 }
171 spin_unlock(&__ip_vs_droppacket_lock);
172
173 /* secure_tcp */
174 write_lock(&__ip_vs_securetcp_lock);
175 switch (sysctl_ip_vs_secure_tcp) {
176 case 0:
177 if (old_secure_tcp >= 2)
178 to_change = 0;
179 break;
180 case 1:
181 if (nomem) {
182 if (old_secure_tcp < 2)
183 to_change = 1;
184 sysctl_ip_vs_secure_tcp = 2;
185 } else {
186 if (old_secure_tcp >= 2)
187 to_change = 0;
188 }
189 break;
190 case 2:
191 if (nomem) {
192 if (old_secure_tcp < 2)
193 to_change = 1;
194 } else {
195 if (old_secure_tcp >= 2)
196 to_change = 0;
197 sysctl_ip_vs_secure_tcp = 1;
198 }
199 break;
200 case 3:
201 if (old_secure_tcp < 2)
202 to_change = 1;
203 break;
204 }
205 old_secure_tcp = sysctl_ip_vs_secure_tcp;
206 if (to_change >= 0)
207 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
208 write_unlock(&__ip_vs_securetcp_lock);
209}
210
211
212/*
213 * Timer for checking the defense
214 */
215#define DEFENSE_TIMER_PERIOD 1*HZ
216static void defense_work_handler(void *data);
217static DECLARE_WORK(defense_work, defense_work_handler, NULL);
218
219static void defense_work_handler(void *data)
220{
221 update_defense_level();
222 if (atomic_read(&ip_vs_dropentry))
223 ip_vs_random_dropentry();
224
225 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
226}
227
228int
229ip_vs_use_count_inc(void)
230{
231 return try_module_get(THIS_MODULE);
232}
233
234void
235ip_vs_use_count_dec(void)
236{
237 module_put(THIS_MODULE);
238}
239
240
241/*
242 * Hash table: for virtual service lookups
243 */
244#define IP_VS_SVC_TAB_BITS 8
245#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
246#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
247
248/* the service table hashed by <protocol, addr, port> */
249static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
250/* the service table hashed by fwmark */
251static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
252
253/*
254 * Hash table: for real service lookups
255 */
256#define IP_VS_RTAB_BITS 4
257#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
258#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
259
260static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
261
262/*
263 * Trash for destinations
264 */
265static LIST_HEAD(ip_vs_dest_trash);
266
267/*
268 * FTP & NULL virtual service counters
269 */
270static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
271static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
272
273
274/*
275 * Returns hash value for virtual service
276 */
277static __inline__ unsigned
278ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
279{
280 register unsigned porth = ntohs(port);
281
282 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283 & IP_VS_SVC_TAB_MASK;
284}
285
286/*
287 * Returns hash value of fwmark for virtual service lookup
288 */
289static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
290{
291 return fwmark & IP_VS_SVC_TAB_MASK;
292}
293
294/*
295 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
296 * or in the ip_vs_svc_fwm_table by fwmark.
297 * Should be called with locked tables.
298 */
299static int ip_vs_svc_hash(struct ip_vs_service *svc)
300{
301 unsigned hash;
302
303 if (svc->flags & IP_VS_SVC_F_HASHED) {
304 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
305 "called from %p\n", __builtin_return_address(0));
306 return 0;
307 }
308
309 if (svc->fwmark == 0) {
310 /*
311 * Hash it by <protocol,addr,port> in ip_vs_svc_table
312 */
313 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
314 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
315 } else {
316 /*
317 * Hash it by fwmark in ip_vs_svc_fwm_table
318 */
319 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
320 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
321 }
322
323 svc->flags |= IP_VS_SVC_F_HASHED;
324 /* increase its refcnt because it is referenced by the svc table */
325 atomic_inc(&svc->refcnt);
326 return 1;
327}
328
329
330/*
331 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
332 * Should be called with locked tables.
333 */
334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335{
336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
337 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
338 "called from %p\n", __builtin_return_address(0));
339 return 0;
340 }
341
342 if (svc->fwmark == 0) {
343 /* Remove it from the ip_vs_svc_table table */
344 list_del(&svc->s_list);
345 } else {
346 /* Remove it from the ip_vs_svc_fwm_table table */
347 list_del(&svc->f_list);
348 }
349
350 svc->flags &= ~IP_VS_SVC_F_HASHED;
351 atomic_dec(&svc->refcnt);
352 return 1;
353}
354
355
356/*
357 * Get service by {proto,addr,port} in the service table.
358 */
359static __inline__ struct ip_vs_service *
360__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
361{
362 unsigned hash;
363 struct ip_vs_service *svc;
364
365 /* Check for "full" addressed entries */
366 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
367
368 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
369 if ((svc->addr == vaddr)
370 && (svc->port == vport)
371 && (svc->protocol == protocol)) {
372 /* HIT */
373 atomic_inc(&svc->usecnt);
374 return svc;
375 }
376 }
377
378 return NULL;
379}
380
381
382/*
383 * Get service by {fwmark} in the service table.
384 */
385static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
386{
387 unsigned hash;
388 struct ip_vs_service *svc;
389
390 /* Check for fwmark addressed entries */
391 hash = ip_vs_svc_fwm_hashkey(fwmark);
392
393 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
394 if (svc->fwmark == fwmark) {
395 /* HIT */
396 atomic_inc(&svc->usecnt);
397 return svc;
398 }
399 }
400
401 return NULL;
402}
403
404struct ip_vs_service *
405ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
406{
407 struct ip_vs_service *svc;
408
409 read_lock(&__ip_vs_svc_lock);
410
411 /*
412 * Check the table hashed by fwmark first
413 */
414 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
415 goto out;
416
417 /*
418 * Check the table hashed by <protocol,addr,port>
419 * for "full" addressed entries
420 */
421 svc = __ip_vs_service_get(protocol, vaddr, vport);
422
423 if (svc == NULL
424 && protocol == IPPROTO_TCP
425 && atomic_read(&ip_vs_ftpsvc_counter)
426 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
427 /*
428 * Check if ftp service entry exists, the packet
429 * might belong to FTP data connections.
430 */
431 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
432 }
433
434 if (svc == NULL
435 && atomic_read(&ip_vs_nullsvc_counter)) {
436 /*
437 * Check if the catch-all port (port zero) exists
438 */
439 svc = __ip_vs_service_get(protocol, vaddr, 0);
440 }
441
442 out:
443 read_unlock(&__ip_vs_svc_lock);
444
445 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
446 fwmark, ip_vs_proto_name(protocol),
447 NIPQUAD(vaddr), ntohs(vport),
448 svc?"hit":"not hit");
449
450 return svc;
451}
452
453
454static inline void
455__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
456{
457 atomic_inc(&svc->refcnt);
458 dest->svc = svc;
459}
460
461static inline void
462__ip_vs_unbind_svc(struct ip_vs_dest *dest)
463{
464 struct ip_vs_service *svc = dest->svc;
465
466 dest->svc = NULL;
467 if (atomic_dec_and_test(&svc->refcnt))
468 kfree(svc);
469}
470
471
472/*
473 * Returns hash value for real service
474 */
475static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
476{
477 register unsigned porth = ntohs(port);
478
479 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
480 & IP_VS_RTAB_MASK;
481}
482
483/*
484 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
485 * should be called with locked tables.
486 */
487static int ip_vs_rs_hash(struct ip_vs_dest *dest)
488{
489 unsigned hash;
490
491 if (!list_empty(&dest->d_list)) {
492 return 0;
493 }
494
495 /*
496 * Hash by proto,addr,port,
497 * which are the parameters of the real service.
498 */
499 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
500 list_add(&dest->d_list, &ip_vs_rtable[hash]);
501
502 return 1;
503}
504
505/*
506 * UNhashes ip_vs_dest from ip_vs_rtable.
507 * should be called with locked tables.
508 */
509static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
510{
511 /*
512 * Remove it from the ip_vs_rtable table.
513 */
514 if (!list_empty(&dest->d_list)) {
515 list_del(&dest->d_list);
516 INIT_LIST_HEAD(&dest->d_list);
517 }
518
519 return 1;
520}
521
522/*
523 * Lookup real service by <proto,addr,port> in the real service table.
524 */
525struct ip_vs_dest *
526ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
527{
528 unsigned hash;
529 struct ip_vs_dest *dest;
530
531 /*
532 * Check for "full" addressed entries
533 * Return the first found entry
534 */
535 hash = ip_vs_rs_hashkey(daddr, dport);
536
537 read_lock(&__ip_vs_rs_lock);
538 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
539 if ((dest->addr == daddr)
540 && (dest->port == dport)
541 && ((dest->protocol == protocol) ||
542 dest->vfwmark)) {
543 /* HIT */
544 read_unlock(&__ip_vs_rs_lock);
545 return dest;
546 }
547 }
548 read_unlock(&__ip_vs_rs_lock);
549
550 return NULL;
551}
552
553/*
554 * Lookup destination by {addr,port} in the given service
555 */
556static struct ip_vs_dest *
557ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
558{
559 struct ip_vs_dest *dest;
560
561 /*
562 * Find the destination for the given service
563 */
564 list_for_each_entry(dest, &svc->destinations, n_list) {
565 if ((dest->addr == daddr) && (dest->port == dport)) {
566 /* HIT */
567 return dest;
568 }
569 }
570
571 return NULL;
572}
573
574
575/*
576 * Lookup dest by {svc,addr,port} in the destination trash.
577 * The destination trash is used to hold the destinations that are removed
578 * from the service table but are still referenced by some conn entries.
579 * The reason to add the destination trash is when the dest is temporary
580 * down (either by administrator or by monitor program), the dest can be
581 * picked back from the trash, the remaining connections to the dest can
582 * continue, and the counting information of the dest is also useful for
583 * scheduling.
584 */
585static struct ip_vs_dest *
586ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
587{
588 struct ip_vs_dest *dest, *nxt;
589
590 /*
591 * Find the destination in trash
592 */
593 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
594 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
595 "refcnt=%d\n",
596 dest->vfwmark,
597 NIPQUAD(dest->addr), ntohs(dest->port),
598 atomic_read(&dest->refcnt));
599 if (dest->addr == daddr &&
600 dest->port == dport &&
601 dest->vfwmark == svc->fwmark &&
602 dest->protocol == svc->protocol &&
603 (svc->fwmark ||
604 (dest->vaddr == svc->addr &&
605 dest->vport == svc->port))) {
606 /* HIT */
607 return dest;
608 }
609
610 /*
611 * Try to purge the destination from trash if not referenced
612 */
613 if (atomic_read(&dest->refcnt) == 1) {
614 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
615 "from trash\n",
616 dest->vfwmark,
617 NIPQUAD(dest->addr), ntohs(dest->port));
618 list_del(&dest->n_list);
619 ip_vs_dst_reset(dest);
620 __ip_vs_unbind_svc(dest);
621 kfree(dest);
622 }
623 }
624
625 return NULL;
626}
627
628
629/*
630 * Clean up all the destinations in the trash
631 * Called by the ip_vs_control_cleanup()
632 *
633 * When the ip_vs_control_clearup is activated by ipvs module exit,
634 * the service tables must have been flushed and all the connections
635 * are expired, and the refcnt of each destination in the trash must
636 * be 1, so we simply release them here.
637 */
638static void ip_vs_trash_cleanup(void)
639{
640 struct ip_vs_dest *dest, *nxt;
641
642 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
643 list_del(&dest->n_list);
644 ip_vs_dst_reset(dest);
645 __ip_vs_unbind_svc(dest);
646 kfree(dest);
647 }
648}
649
650
651static void
652ip_vs_zero_stats(struct ip_vs_stats *stats)
653{
654 spin_lock_bh(&stats->lock);
655 memset(stats, 0, (char *)&stats->lock - (char *)stats);
656 spin_unlock_bh(&stats->lock);
657 ip_vs_zero_estimator(stats);
658}
659
660/*
661 * Update a destination in the given service
662 */
663static void
664__ip_vs_update_dest(struct ip_vs_service *svc,
665 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
666{
667 int conn_flags;
668
669 /* set the weight and the flags */
670 atomic_set(&dest->weight, udest->weight);
671 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
672
673 /* check if local node and update the flags */
674 if (inet_addr_type(udest->addr) == RTN_LOCAL) {
675 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
676 | IP_VS_CONN_F_LOCALNODE;
677 }
678
679 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
680 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
681 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
682 } else {
683 /*
684 * Put the real service in ip_vs_rtable if not present.
685 * For now only for NAT!
686 */
687 write_lock_bh(&__ip_vs_rs_lock);
688 ip_vs_rs_hash(dest);
689 write_unlock_bh(&__ip_vs_rs_lock);
690 }
691 atomic_set(&dest->conn_flags, conn_flags);
692
693 /* bind the service */
694 if (!dest->svc) {
695 __ip_vs_bind_svc(dest, svc);
696 } else {
697 if (dest->svc != svc) {
698 __ip_vs_unbind_svc(dest);
699 ip_vs_zero_stats(&dest->stats);
700 __ip_vs_bind_svc(dest, svc);
701 }
702 }
703
704 /* set the dest status flags */
705 dest->flags |= IP_VS_DEST_F_AVAILABLE;
706
707 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
708 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
709 dest->u_threshold = udest->u_threshold;
710 dest->l_threshold = udest->l_threshold;
711}
712
713
714/*
715 * Create a destination for the given service
716 */
717static int
718ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
719 struct ip_vs_dest **dest_p)
720{
721 struct ip_vs_dest *dest;
722 unsigned atype;
723
724 EnterFunction(2);
725
726 atype = inet_addr_type(udest->addr);
727 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
728 return -EINVAL;
729
730 dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
731 if (dest == NULL) {
732 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
733 return -ENOMEM;
734 }
735 memset(dest, 0, sizeof(struct ip_vs_dest));
736
737 dest->protocol = svc->protocol;
738 dest->vaddr = svc->addr;
739 dest->vport = svc->port;
740 dest->vfwmark = svc->fwmark;
741 dest->addr = udest->addr;
742 dest->port = udest->port;
743
744 atomic_set(&dest->activeconns, 0);
745 atomic_set(&dest->inactconns, 0);
746 atomic_set(&dest->persistconns, 0);
747 atomic_set(&dest->refcnt, 0);
748
749 INIT_LIST_HEAD(&dest->d_list);
750 spin_lock_init(&dest->dst_lock);
751 spin_lock_init(&dest->stats.lock);
752 __ip_vs_update_dest(svc, dest, udest);
753 ip_vs_new_estimator(&dest->stats);
754
755 *dest_p = dest;
756
757 LeaveFunction(2);
758 return 0;
759}
760
761
762/*
763 * Add a destination into an existing service
764 */
765static int
766ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
767{
768 struct ip_vs_dest *dest;
769 __u32 daddr = udest->addr;
770 __u16 dport = udest->port;
771 int ret;
772
773 EnterFunction(2);
774
775 if (udest->weight < 0) {
776 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
777 return -ERANGE;
778 }
779
780 if (udest->l_threshold > udest->u_threshold) {
781 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
782 "upper threshold\n");
783 return -ERANGE;
784 }
785
786 /*
787 * Check if the dest already exists in the list
788 */
789 dest = ip_vs_lookup_dest(svc, daddr, dport);
790 if (dest != NULL) {
791 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
792 return -EEXIST;
793 }
794
795 /*
796 * Check if the dest already exists in the trash and
797 * is from the same service
798 */
799 dest = ip_vs_trash_get_dest(svc, daddr, dport);
800 if (dest != NULL) {
801 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
802 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
803 NIPQUAD(daddr), ntohs(dport),
804 atomic_read(&dest->refcnt),
805 dest->vfwmark,
806 NIPQUAD(dest->vaddr),
807 ntohs(dest->vport));
808 __ip_vs_update_dest(svc, dest, udest);
809
810 /*
811 * Get the destination from the trash
812 */
813 list_del(&dest->n_list);
814
815 ip_vs_new_estimator(&dest->stats);
816
817 write_lock_bh(&__ip_vs_svc_lock);
818
819 /*
820 * Wait until all other svc users go away.
821 */
822 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
823
824 list_add(&dest->n_list, &svc->destinations);
825 svc->num_dests++;
826
827 /* call the update_service function of its scheduler */
828 svc->scheduler->update_service(svc);
829
830 write_unlock_bh(&__ip_vs_svc_lock);
831 return 0;
832 }
833
834 /*
835 * Allocate and initialize the dest structure
836 */
837 ret = ip_vs_new_dest(svc, udest, &dest);
838 if (ret) {
839 return ret;
840 }
841
842 /*
843 * Add the dest entry into the list
844 */
845 atomic_inc(&dest->refcnt);
846
847 write_lock_bh(&__ip_vs_svc_lock);
848
849 /*
850 * Wait until all other svc users go away.
851 */
852 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
853
854 list_add(&dest->n_list, &svc->destinations);
855 svc->num_dests++;
856
857 /* call the update_service function of its scheduler */
858 svc->scheduler->update_service(svc);
859
860 write_unlock_bh(&__ip_vs_svc_lock);
861
862 LeaveFunction(2);
863
864 return 0;
865}
866
867
868/*
869 * Edit a destination in the given service
870 */
871static int
872ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
873{
874 struct ip_vs_dest *dest;
875 __u32 daddr = udest->addr;
876 __u16 dport = udest->port;
877
878 EnterFunction(2);
879
880 if (udest->weight < 0) {
881 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
882 return -ERANGE;
883 }
884
885 if (udest->l_threshold > udest->u_threshold) {
886 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
887 "upper threshold\n");
888 return -ERANGE;
889 }
890
891 /*
892 * Lookup the destination list
893 */
894 dest = ip_vs_lookup_dest(svc, daddr, dport);
895 if (dest == NULL) {
896 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
897 return -ENOENT;
898 }
899
900 __ip_vs_update_dest(svc, dest, udest);
901
902 write_lock_bh(&__ip_vs_svc_lock);
903
904 /* Wait until all other svc users go away */
905 while (atomic_read(&svc->usecnt) > 1) {};
906
907 /* call the update_service, because server weight may be changed */
908 svc->scheduler->update_service(svc);
909
910 write_unlock_bh(&__ip_vs_svc_lock);
911
912 LeaveFunction(2);
913
914 return 0;
915}
916
917
918/*
919 * Delete a destination (must be already unlinked from the service)
920 */
921static void __ip_vs_del_dest(struct ip_vs_dest *dest)
922{
923 ip_vs_kill_estimator(&dest->stats);
924
925 /*
926 * Remove it from the d-linked list with the real services.
927 */
928 write_lock_bh(&__ip_vs_rs_lock);
929 ip_vs_rs_unhash(dest);
930 write_unlock_bh(&__ip_vs_rs_lock);
931
932 /*
933 * Decrease the refcnt of the dest, and free the dest
934 * if nobody refers to it (refcnt=0). Otherwise, throw
935 * the destination into the trash.
936 */
937 if (atomic_dec_and_test(&dest->refcnt)) {
938 ip_vs_dst_reset(dest);
939 /* simply decrease svc->refcnt here, let the caller check
940 and release the service if nobody refers to it.
941 Only user context can release destination and service,
942 and only one user context can update virtual service at a
943 time, so the operation here is OK */
944 atomic_dec(&dest->svc->refcnt);
945 kfree(dest);
946 } else {
947 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
948 NIPQUAD(dest->addr), ntohs(dest->port),
949 atomic_read(&dest->refcnt));
950 list_add(&dest->n_list, &ip_vs_dest_trash);
951 atomic_inc(&dest->refcnt);
952 }
953}
954
955
956/*
957 * Unlink a destination from the given service
958 */
959static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
960 struct ip_vs_dest *dest,
961 int svcupd)
962{
963 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
964
965 /*
966 * Remove it from the d-linked destination list.
967 */
968 list_del(&dest->n_list);
969 svc->num_dests--;
970 if (svcupd) {
971 /*
972 * Call the update_service function of its scheduler
973 */
974 svc->scheduler->update_service(svc);
975 }
976}
977
978
979/*
980 * Delete a destination server in the given service
981 */
982static int
983ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
984{
985 struct ip_vs_dest *dest;
986 __u32 daddr = udest->addr;
987 __u16 dport = udest->port;
988
989 EnterFunction(2);
990
991 dest = ip_vs_lookup_dest(svc, daddr, dport);
992 if (dest == NULL) {
993 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
994 return -ENOENT;
995 }
996
997 write_lock_bh(&__ip_vs_svc_lock);
998
999 /*
1000 * Wait until all other svc users go away.
1001 */
1002 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1003
1004 /*
1005 * Unlink dest from the service
1006 */
1007 __ip_vs_unlink_dest(svc, dest, 1);
1008
1009 write_unlock_bh(&__ip_vs_svc_lock);
1010
1011 /*
1012 * Delete the destination
1013 */
1014 __ip_vs_del_dest(dest);
1015
1016 LeaveFunction(2);
1017
1018 return 0;
1019}
1020
1021
1022/*
1023 * Add a service into the service hash table
1024 */
1025static int
1026ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1027{
1028 int ret = 0;
1029 struct ip_vs_scheduler *sched = NULL;
1030 struct ip_vs_service *svc = NULL;
1031
1032 /* increase the module use count */
1033 ip_vs_use_count_inc();
1034
1035 /* Lookup the scheduler by 'u->sched_name' */
1036 sched = ip_vs_scheduler_get(u->sched_name);
1037 if (sched == NULL) {
1038 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1039 u->sched_name);
1040 ret = -ENOENT;
1041 goto out_mod_dec;
1042 }
1043
1044 svc = (struct ip_vs_service *)
1045 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1046 if (svc == NULL) {
1047 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1048 ret = -ENOMEM;
1049 goto out_err;
1050 }
1051 memset(svc, 0, sizeof(struct ip_vs_service));
1052
1053 /* I'm the first user of the service */
1054 atomic_set(&svc->usecnt, 1);
1055 atomic_set(&svc->refcnt, 0);
1056
1057 svc->protocol = u->protocol;
1058 svc->addr = u->addr;
1059 svc->port = u->port;
1060 svc->fwmark = u->fwmark;
1061 svc->flags = u->flags;
1062 svc->timeout = u->timeout * HZ;
1063 svc->netmask = u->netmask;
1064
1065 INIT_LIST_HEAD(&svc->destinations);
1066 rwlock_init(&svc->sched_lock);
1067 spin_lock_init(&svc->stats.lock);
1068
1069 /* Bind the scheduler */
1070 ret = ip_vs_bind_scheduler(svc, sched);
1071 if (ret)
1072 goto out_err;
1073 sched = NULL;
1074
1075 /* Update the virtual service counters */
1076 if (svc->port == FTPPORT)
1077 atomic_inc(&ip_vs_ftpsvc_counter);
1078 else if (svc->port == 0)
1079 atomic_inc(&ip_vs_nullsvc_counter);
1080
1081 ip_vs_new_estimator(&svc->stats);
1082 ip_vs_num_services++;
1083
1084 /* Hash the service into the service table */
1085 write_lock_bh(&__ip_vs_svc_lock);
1086 ip_vs_svc_hash(svc);
1087 write_unlock_bh(&__ip_vs_svc_lock);
1088
1089 *svc_p = svc;
1090 return 0;
1091
1092 out_err:
1093 if (svc != NULL) {
1094 if (svc->scheduler)
1095 ip_vs_unbind_scheduler(svc);
1096 if (svc->inc) {
1097 local_bh_disable();
1098 ip_vs_app_inc_put(svc->inc);
1099 local_bh_enable();
1100 }
1101 kfree(svc);
1102 }
1103 ip_vs_scheduler_put(sched);
1104
1105 out_mod_dec:
1106 /* decrease the module use count */
1107 ip_vs_use_count_dec();
1108
1109 return ret;
1110}
1111
1112
1113/*
1114 * Edit a service and bind it with a new scheduler
1115 */
1116static int
1117ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1118{
1119 struct ip_vs_scheduler *sched, *old_sched;
1120 int ret = 0;
1121
1122 /*
1123 * Lookup the scheduler, by 'u->sched_name'
1124 */
1125 sched = ip_vs_scheduler_get(u->sched_name);
1126 if (sched == NULL) {
1127 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1128 u->sched_name);
1129 return -ENOENT;
1130 }
1131 old_sched = sched;
1132
1133 write_lock_bh(&__ip_vs_svc_lock);
1134
1135 /*
1136 * Wait until all other svc users go away.
1137 */
1138 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1139
1140 /*
1141 * Set the flags and timeout value
1142 */
1143 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1144 svc->timeout = u->timeout * HZ;
1145 svc->netmask = u->netmask;
1146
1147 old_sched = svc->scheduler;
1148 if (sched != old_sched) {
1149 /*
1150 * Unbind the old scheduler
1151 */
1152 if ((ret = ip_vs_unbind_scheduler(svc))) {
1153 old_sched = sched;
1154 goto out;
1155 }
1156
1157 /*
1158 * Bind the new scheduler
1159 */
1160 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1161 /*
1162 * If ip_vs_bind_scheduler fails, restore the old
1163 * scheduler.
1164 * The main reason of failure is out of memory.
1165 *
1166 * The question is if the old scheduler can be
1167 * restored all the time. TODO: if it cannot be
1168 * restored some time, we must delete the service,
1169 * otherwise the system may crash.
1170 */
1171 ip_vs_bind_scheduler(svc, old_sched);
1172 old_sched = sched;
1173 goto out;
1174 }
1175 }
1176
1177 out:
1178 write_unlock_bh(&__ip_vs_svc_lock);
1179
1180 if (old_sched)
1181 ip_vs_scheduler_put(old_sched);
1182
1183 return ret;
1184}
1185
1186
1187/*
1188 * Delete a service from the service list
1189 * - The service must be unlinked, unlocked and not referenced!
1190 * - We are called under _bh lock
1191 */
1192static void __ip_vs_del_service(struct ip_vs_service *svc)
1193{
1194 struct ip_vs_dest *dest, *nxt;
1195 struct ip_vs_scheduler *old_sched;
1196
1197 ip_vs_num_services--;
1198 ip_vs_kill_estimator(&svc->stats);
1199
1200 /* Unbind scheduler */
1201 old_sched = svc->scheduler;
1202 ip_vs_unbind_scheduler(svc);
1203 if (old_sched)
1204 ip_vs_scheduler_put(old_sched);
1205
1206 /* Unbind app inc */
1207 if (svc->inc) {
1208 ip_vs_app_inc_put(svc->inc);
1209 svc->inc = NULL;
1210 }
1211
1212 /*
1213 * Unlink the whole destination list
1214 */
1215 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1216 __ip_vs_unlink_dest(svc, dest, 0);
1217 __ip_vs_del_dest(dest);
1218 }
1219
1220 /*
1221 * Update the virtual service counters
1222 */
1223 if (svc->port == FTPPORT)
1224 atomic_dec(&ip_vs_ftpsvc_counter);
1225 else if (svc->port == 0)
1226 atomic_dec(&ip_vs_nullsvc_counter);
1227
1228 /*
1229 * Free the service if nobody refers to it
1230 */
1231 if (atomic_read(&svc->refcnt) == 0)
1232 kfree(svc);
1233
1234 /* decrease the module use count */
1235 ip_vs_use_count_dec();
1236}
1237
1238/*
1239 * Delete a service from the service list
1240 */
1241static int ip_vs_del_service(struct ip_vs_service *svc)
1242{
1243 if (svc == NULL)
1244 return -EEXIST;
1245
1246 /*
1247 * Unhash it from the service table
1248 */
1249 write_lock_bh(&__ip_vs_svc_lock);
1250
1251 ip_vs_svc_unhash(svc);
1252
1253 /*
1254 * Wait until all the svc users go away.
1255 */
1256 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1257
1258 __ip_vs_del_service(svc);
1259
1260 write_unlock_bh(&__ip_vs_svc_lock);
1261
1262 return 0;
1263}
1264
1265
1266/*
1267 * Flush all the virtual services
1268 */
1269static int ip_vs_flush(void)
1270{
1271 int idx;
1272 struct ip_vs_service *svc, *nxt;
1273
1274 /*
1275 * Flush the service table hashed by <protocol,addr,port>
1276 */
1277 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1278 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1279 write_lock_bh(&__ip_vs_svc_lock);
1280 ip_vs_svc_unhash(svc);
1281 /*
1282 * Wait until all the svc users go away.
1283 */
1284 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1285 __ip_vs_del_service(svc);
1286 write_unlock_bh(&__ip_vs_svc_lock);
1287 }
1288 }
1289
1290 /*
1291 * Flush the service table hashed by fwmark
1292 */
1293 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1294 list_for_each_entry_safe(svc, nxt,
1295 &ip_vs_svc_fwm_table[idx], f_list) {
1296 write_lock_bh(&__ip_vs_svc_lock);
1297 ip_vs_svc_unhash(svc);
1298 /*
1299 * Wait until all the svc users go away.
1300 */
1301 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1302 __ip_vs_del_service(svc);
1303 write_unlock_bh(&__ip_vs_svc_lock);
1304 }
1305 }
1306
1307 return 0;
1308}
1309
1310
1311/*
1312 * Zero counters in a service or all services
1313 */
1314static int ip_vs_zero_service(struct ip_vs_service *svc)
1315{
1316 struct ip_vs_dest *dest;
1317
1318 write_lock_bh(&__ip_vs_svc_lock);
1319 list_for_each_entry(dest, &svc->destinations, n_list) {
1320 ip_vs_zero_stats(&dest->stats);
1321 }
1322 ip_vs_zero_stats(&svc->stats);
1323 write_unlock_bh(&__ip_vs_svc_lock);
1324 return 0;
1325}
1326
1327static int ip_vs_zero_all(void)
1328{
1329 int idx;
1330 struct ip_vs_service *svc;
1331
1332 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1333 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1334 ip_vs_zero_service(svc);
1335 }
1336 }
1337
1338 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1340 ip_vs_zero_service(svc);
1341 }
1342 }
1343
1344 ip_vs_zero_stats(&ip_vs_stats);
1345 return 0;
1346}
1347
1348
1349static int
1350proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1351 void __user *buffer, size_t *lenp, loff_t *ppos)
1352{
1353 int *valp = table->data;
1354 int val = *valp;
1355 int rc;
1356
1357 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1358 if (write && (*valp != val)) {
1359 if ((*valp < 0) || (*valp > 3)) {
1360 /* Restore the correct value */
1361 *valp = val;
1362 } else {
1363 local_bh_disable();
1364 update_defense_level();
1365 local_bh_enable();
1366 }
1367 }
1368 return rc;
1369}
1370
1371
1372static int
1373proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1374 void __user *buffer, size_t *lenp, loff_t *ppos)
1375{
1376 int *valp = table->data;
1377 int val[2];
1378 int rc;
1379
1380 /* backup the value first */
1381 memcpy(val, valp, sizeof(val));
1382
1383 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1384 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1385 /* Restore the correct value */
1386 memcpy(valp, val, sizeof(val));
1387 }
1388 return rc;
1389}
1390
1391
1392/*
1393 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1394 */
1395
1396static struct ctl_table vs_vars[] = {
1397 {
1398 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1399 .procname = "amemthresh",
1400 .data = &sysctl_ip_vs_amemthresh,
1401 .maxlen = sizeof(int),
1402 .mode = 0644,
1403 .proc_handler = &proc_dointvec,
1404 },
1405#ifdef CONFIG_IP_VS_DEBUG
1406 {
1407 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1408 .procname = "debug_level",
1409 .data = &sysctl_ip_vs_debug_level,
1410 .maxlen = sizeof(int),
1411 .mode = 0644,
1412 .proc_handler = &proc_dointvec,
1413 },
1414#endif
1415 {
1416 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1417 .procname = "am_droprate",
1418 .data = &sysctl_ip_vs_am_droprate,
1419 .maxlen = sizeof(int),
1420 .mode = 0644,
1421 .proc_handler = &proc_dointvec,
1422 },
1423 {
1424 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1425 .procname = "drop_entry",
1426 .data = &sysctl_ip_vs_drop_entry,
1427 .maxlen = sizeof(int),
1428 .mode = 0644,
1429 .proc_handler = &proc_do_defense_mode,
1430 },
1431 {
1432 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1433 .procname = "drop_packet",
1434 .data = &sysctl_ip_vs_drop_packet,
1435 .maxlen = sizeof(int),
1436 .mode = 0644,
1437 .proc_handler = &proc_do_defense_mode,
1438 },
1439 {
1440 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1441 .procname = "secure_tcp",
1442 .data = &sysctl_ip_vs_secure_tcp,
1443 .maxlen = sizeof(int),
1444 .mode = 0644,
1445 .proc_handler = &proc_do_defense_mode,
1446 },
1447#if 0
1448 {
1449 .ctl_name = NET_IPV4_VS_TO_ES,
1450 .procname = "timeout_established",
1451 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1452 .maxlen = sizeof(int),
1453 .mode = 0644,
1454 .proc_handler = &proc_dointvec_jiffies,
1455 },
1456 {
1457 .ctl_name = NET_IPV4_VS_TO_SS,
1458 .procname = "timeout_synsent",
1459 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1460 .maxlen = sizeof(int),
1461 .mode = 0644,
1462 .proc_handler = &proc_dointvec_jiffies,
1463 },
1464 {
1465 .ctl_name = NET_IPV4_VS_TO_SR,
1466 .procname = "timeout_synrecv",
1467 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1468 .maxlen = sizeof(int),
1469 .mode = 0644,
1470 .proc_handler = &proc_dointvec_jiffies,
1471 },
1472 {
1473 .ctl_name = NET_IPV4_VS_TO_FW,
1474 .procname = "timeout_finwait",
1475 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1476 .maxlen = sizeof(int),
1477 .mode = 0644,
1478 .proc_handler = &proc_dointvec_jiffies,
1479 },
1480 {
1481 .ctl_name = NET_IPV4_VS_TO_TW,
1482 .procname = "timeout_timewait",
1483 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1484 .maxlen = sizeof(int),
1485 .mode = 0644,
1486 .proc_handler = &proc_dointvec_jiffies,
1487 },
1488 {
1489 .ctl_name = NET_IPV4_VS_TO_CL,
1490 .procname = "timeout_close",
1491 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1492 .maxlen = sizeof(int),
1493 .mode = 0644,
1494 .proc_handler = &proc_dointvec_jiffies,
1495 },
1496 {
1497 .ctl_name = NET_IPV4_VS_TO_CW,
1498 .procname = "timeout_closewait",
1499 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1500 .maxlen = sizeof(int),
1501 .mode = 0644,
1502 .proc_handler = &proc_dointvec_jiffies,
1503 },
1504 {
1505 .ctl_name = NET_IPV4_VS_TO_LA,
1506 .procname = "timeout_lastack",
1507 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1508 .maxlen = sizeof(int),
1509 .mode = 0644,
1510 .proc_handler = &proc_dointvec_jiffies,
1511 },
1512 {
1513 .ctl_name = NET_IPV4_VS_TO_LI,
1514 .procname = "timeout_listen",
1515 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1516 .maxlen = sizeof(int),
1517 .mode = 0644,
1518 .proc_handler = &proc_dointvec_jiffies,
1519 },
1520 {
1521 .ctl_name = NET_IPV4_VS_TO_SA,
1522 .procname = "timeout_synack",
1523 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1524 .maxlen = sizeof(int),
1525 .mode = 0644,
1526 .proc_handler = &proc_dointvec_jiffies,
1527 },
1528 {
1529 .ctl_name = NET_IPV4_VS_TO_UDP,
1530 .procname = "timeout_udp",
1531 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1532 .maxlen = sizeof(int),
1533 .mode = 0644,
1534 .proc_handler = &proc_dointvec_jiffies,
1535 },
1536 {
1537 .ctl_name = NET_IPV4_VS_TO_ICMP,
1538 .procname = "timeout_icmp",
1539 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1540 .maxlen = sizeof(int),
1541 .mode = 0644,
1542 .proc_handler = &proc_dointvec_jiffies,
1543 },
1544#endif
1545 {
1546 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1547 .procname = "cache_bypass",
1548 .data = &sysctl_ip_vs_cache_bypass,
1549 .maxlen = sizeof(int),
1550 .mode = 0644,
1551 .proc_handler = &proc_dointvec,
1552 },
1553 {
1554 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1555 .procname = "expire_nodest_conn",
1556 .data = &sysctl_ip_vs_expire_nodest_conn,
1557 .maxlen = sizeof(int),
1558 .mode = 0644,
1559 .proc_handler = &proc_dointvec,
1560 },
1561 {
1562 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1563 .procname = "expire_quiescent_template",
1564 .data = &sysctl_ip_vs_expire_quiescent_template,
1565 .maxlen = sizeof(int),
1566 .mode = 0644,
1567 .proc_handler = &proc_dointvec,
1568 },
1569 {
1570 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1571 .procname = "sync_threshold",
1572 .data = &sysctl_ip_vs_sync_threshold,
1573 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1574 .mode = 0644,
1575 .proc_handler = &proc_do_sync_threshold,
1576 },
1577 {
1578 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1579 .procname = "nat_icmp_send",
1580 .data = &sysctl_ip_vs_nat_icmp_send,
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_dointvec,
1584 },
1585 { .ctl_name = 0 }
1586};
1587
1588static ctl_table vs_table[] = {
1589 {
1590 .ctl_name = NET_IPV4_VS,
1591 .procname = "vs",
1592 .mode = 0555,
1593 .child = vs_vars
1594 },
1595 { .ctl_name = 0 }
1596};
1597
1598static ctl_table ipv4_table[] = {
1599 {
1600 .ctl_name = NET_IPV4,
1601 .procname = "ipv4",
1602 .mode = 0555,
1603 .child = vs_table,
1604 },
1605 { .ctl_name = 0 }
1606};
1607
1608static ctl_table vs_root_table[] = {
1609 {
1610 .ctl_name = CTL_NET,
1611 .procname = "net",
1612 .mode = 0555,
1613 .child = ipv4_table,
1614 },
1615 { .ctl_name = 0 }
1616};
1617
1618static struct ctl_table_header * sysctl_header;
1619
1620#ifdef CONFIG_PROC_FS
1621
1622struct ip_vs_iter {
1623 struct list_head *table;
1624 int bucket;
1625};
1626
1627/*
1628 * Write the contents of the VS rule table to a PROCfs file.
1629 * (It is kept just for backward compatibility)
1630 */
1631static inline const char *ip_vs_fwd_name(unsigned flags)
1632{
1633 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1634 case IP_VS_CONN_F_LOCALNODE:
1635 return "Local";
1636 case IP_VS_CONN_F_TUNNEL:
1637 return "Tunnel";
1638 case IP_VS_CONN_F_DROUTE:
1639 return "Route";
1640 default:
1641 return "Masq";
1642 }
1643}
1644
1645
1646/* Get the Nth entry in the two lists */
1647static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1648{
1649 struct ip_vs_iter *iter = seq->private;
1650 int idx;
1651 struct ip_vs_service *svc;
1652
1653 /* look in hash by protocol */
1654 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1655 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1656 if (pos-- == 0){
1657 iter->table = ip_vs_svc_table;
1658 iter->bucket = idx;
1659 return svc;
1660 }
1661 }
1662 }
1663
1664 /* keep looking in fwmark */
1665 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1666 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1667 if (pos-- == 0) {
1668 iter->table = ip_vs_svc_fwm_table;
1669 iter->bucket = idx;
1670 return svc;
1671 }
1672 }
1673 }
1674
1675 return NULL;
1676}
1677
1678static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1679{
1680
1681 read_lock_bh(&__ip_vs_svc_lock);
1682 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1683}
1684
1685
1686static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1687{
1688 struct list_head *e;
1689 struct ip_vs_iter *iter;
1690 struct ip_vs_service *svc;
1691
1692 ++*pos;
1693 if (v == SEQ_START_TOKEN)
1694 return ip_vs_info_array(seq,0);
1695
1696 svc = v;
1697 iter = seq->private;
1698
1699 if (iter->table == ip_vs_svc_table) {
1700 /* next service in table hashed by protocol */
1701 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1702 return list_entry(e, struct ip_vs_service, s_list);
1703
1704
1705 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1706 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1707 s_list) {
1708 return svc;
1709 }
1710 }
1711
1712 iter->table = ip_vs_svc_fwm_table;
1713 iter->bucket = -1;
1714 goto scan_fwmark;
1715 }
1716
1717 /* next service in hashed by fwmark */
1718 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1719 return list_entry(e, struct ip_vs_service, f_list);
1720
1721 scan_fwmark:
1722 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1723 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1724 f_list)
1725 return svc;
1726 }
1727
1728 return NULL;
1729}
1730
1731static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1732{
1733 read_unlock_bh(&__ip_vs_svc_lock);
1734}
1735
1736
1737static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1738{
1739 if (v == SEQ_START_TOKEN) {
1740 seq_printf(seq,
1741 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1742 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1743 seq_puts(seq,
1744 "Prot LocalAddress:Port Scheduler Flags\n");
1745 seq_puts(seq,
1746 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1747 } else {
1748 const struct ip_vs_service *svc = v;
1749 const struct ip_vs_iter *iter = seq->private;
1750 const struct ip_vs_dest *dest;
1751
1752 if (iter->table == ip_vs_svc_table)
1753 seq_printf(seq, "%s %08X:%04X %s ",
1754 ip_vs_proto_name(svc->protocol),
1755 ntohl(svc->addr),
1756 ntohs(svc->port),
1757 svc->scheduler->name);
1758 else
1759 seq_printf(seq, "FWM %08X %s ",
1760 svc->fwmark, svc->scheduler->name);
1761
1762 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1763 seq_printf(seq, "persistent %d %08X\n",
1764 svc->timeout,
1765 ntohl(svc->netmask));
1766 else
1767 seq_putc(seq, '\n');
1768
1769 list_for_each_entry(dest, &svc->destinations, n_list) {
1770 seq_printf(seq,
1771 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1772 ntohl(dest->addr), ntohs(dest->port),
1773 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1774 atomic_read(&dest->weight),
1775 atomic_read(&dest->activeconns),
1776 atomic_read(&dest->inactconns));
1777 }
1778 }
1779 return 0;
1780}
1781
1782static struct seq_operations ip_vs_info_seq_ops = {
1783 .start = ip_vs_info_seq_start,
1784 .next = ip_vs_info_seq_next,
1785 .stop = ip_vs_info_seq_stop,
1786 .show = ip_vs_info_seq_show,
1787};
1788
1789static int ip_vs_info_open(struct inode *inode, struct file *file)
1790{
1791 struct seq_file *seq;
1792 int rc = -ENOMEM;
1793 struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1794
1795 if (!s)
1796 goto out;
1797
1798 rc = seq_open(file, &ip_vs_info_seq_ops);
1799 if (rc)
1800 goto out_kfree;
1801
1802 seq = file->private_data;
1803 seq->private = s;
1804 memset(s, 0, sizeof(*s));
1805out:
1806 return rc;
1807out_kfree:
1808 kfree(s);
1809 goto out;
1810}
1811
1812static struct file_operations ip_vs_info_fops = {
1813 .owner = THIS_MODULE,
1814 .open = ip_vs_info_open,
1815 .read = seq_read,
1816 .llseek = seq_lseek,
1817 .release = seq_release_private,
1818};
1819
1820#endif
1821
1822struct ip_vs_stats ip_vs_stats;
1823
1824#ifdef CONFIG_PROC_FS
1825static int ip_vs_stats_show(struct seq_file *seq, void *v)
1826{
1827
1828/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1829 seq_puts(seq,
1830 " Total Incoming Outgoing Incoming Outgoing\n");
1831 seq_printf(seq,
1832 " Conns Packets Packets Bytes Bytes\n");
1833
1834 spin_lock_bh(&ip_vs_stats.lock);
1835 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1836 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1837 (unsigned long long) ip_vs_stats.inbytes,
1838 (unsigned long long) ip_vs_stats.outbytes);
1839
1840/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1841 seq_puts(seq,
1842 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1843 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1844 ip_vs_stats.cps,
1845 ip_vs_stats.inpps,
1846 ip_vs_stats.outpps,
1847 ip_vs_stats.inbps,
1848 ip_vs_stats.outbps);
1849 spin_unlock_bh(&ip_vs_stats.lock);
1850
1851 return 0;
1852}
1853
1854static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1855{
1856 return single_open(file, ip_vs_stats_show, NULL);
1857}
1858
1859static struct file_operations ip_vs_stats_fops = {
1860 .owner = THIS_MODULE,
1861 .open = ip_vs_stats_seq_open,
1862 .read = seq_read,
1863 .llseek = seq_lseek,
1864 .release = single_release,
1865};
1866
1867#endif
1868
1869/*
1870 * Set timeout values for tcp tcpfin udp in the timeout_table.
1871 */
1872static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1873{
1874 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1875 u->tcp_timeout,
1876 u->tcp_fin_timeout,
1877 u->udp_timeout);
1878
1879#ifdef CONFIG_IP_VS_PROTO_TCP
1880 if (u->tcp_timeout) {
1881 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1882 = u->tcp_timeout * HZ;
1883 }
1884
1885 if (u->tcp_fin_timeout) {
1886 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1887 = u->tcp_fin_timeout * HZ;
1888 }
1889#endif
1890
1891#ifdef CONFIG_IP_VS_PROTO_UDP
1892 if (u->udp_timeout) {
1893 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1894 = u->udp_timeout * HZ;
1895 }
1896#endif
1897 return 0;
1898}
1899
1900
1901#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1902#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1903#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1904 sizeof(struct ip_vs_dest_user))
1905#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1906#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1907#define MAX_ARG_LEN SVCDEST_ARG_LEN
1908
1909static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1910 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1911 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1912 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1913 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1914 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1915 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1917 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1919 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1921};
1922
1923static int
1924do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1925{
1926 int ret;
1927 unsigned char arg[MAX_ARG_LEN];
1928 struct ip_vs_service_user *usvc;
1929 struct ip_vs_service *svc;
1930 struct ip_vs_dest_user *udest;
1931
1932 if (!capable(CAP_NET_ADMIN))
1933 return -EPERM;
1934
1935 if (len != set_arglen[SET_CMDID(cmd)]) {
1936 IP_VS_ERR("set_ctl: len %u != %u\n",
1937 len, set_arglen[SET_CMDID(cmd)]);
1938 return -EINVAL;
1939 }
1940
1941 if (copy_from_user(arg, user, len) != 0)
1942 return -EFAULT;
1943
1944 /* increase the module use count */
1945 ip_vs_use_count_inc();
1946
1947 if (down_interruptible(&__ip_vs_mutex)) {
1948 ret = -ERESTARTSYS;
1949 goto out_dec;
1950 }
1951
1952 if (cmd == IP_VS_SO_SET_FLUSH) {
1953 /* Flush the virtual service */
1954 ret = ip_vs_flush();
1955 goto out_unlock;
1956 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1957 /* Set timeout values for (tcp tcpfin udp) */
1958 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1959 goto out_unlock;
1960 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1961 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1962 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1963 goto out_unlock;
1964 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1965 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1966 ret = stop_sync_thread(dm->state);
1967 goto out_unlock;
1968 }
1969
1970 usvc = (struct ip_vs_service_user *)arg;
1971 udest = (struct ip_vs_dest_user *)(usvc + 1);
1972
1973 if (cmd == IP_VS_SO_SET_ZERO) {
1974 /* if no service address is set, zero counters in all */
1975 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1976 ret = ip_vs_zero_all();
1977 goto out_unlock;
1978 }
1979 }
1980
1981 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1982 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1983 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1984 usvc->protocol, NIPQUAD(usvc->addr),
1985 ntohs(usvc->port), usvc->sched_name);
1986 ret = -EFAULT;
1987 goto out_unlock;
1988 }
1989
1990 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1991 if (usvc->fwmark == 0)
1992 svc = __ip_vs_service_get(usvc->protocol,
1993 usvc->addr, usvc->port);
1994 else
1995 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1996
1997 if (cmd != IP_VS_SO_SET_ADD
1998 && (svc == NULL || svc->protocol != usvc->protocol)) {
1999 ret = -ESRCH;
2000 goto out_unlock;
2001 }
2002
2003 switch (cmd) {
2004 case IP_VS_SO_SET_ADD:
2005 if (svc != NULL)
2006 ret = -EEXIST;
2007 else
2008 ret = ip_vs_add_service(usvc, &svc);
2009 break;
2010 case IP_VS_SO_SET_EDIT:
2011 ret = ip_vs_edit_service(svc, usvc);
2012 break;
2013 case IP_VS_SO_SET_DEL:
2014 ret = ip_vs_del_service(svc);
2015 if (!ret)
2016 goto out_unlock;
2017 break;
2018 case IP_VS_SO_SET_ZERO:
2019 ret = ip_vs_zero_service(svc);
2020 break;
2021 case IP_VS_SO_SET_ADDDEST:
2022 ret = ip_vs_add_dest(svc, udest);
2023 break;
2024 case IP_VS_SO_SET_EDITDEST:
2025 ret = ip_vs_edit_dest(svc, udest);
2026 break;
2027 case IP_VS_SO_SET_DELDEST:
2028 ret = ip_vs_del_dest(svc, udest);
2029 break;
2030 default:
2031 ret = -EINVAL;
2032 }
2033
2034 if (svc)
2035 ip_vs_service_put(svc);
2036
2037 out_unlock:
2038 up(&__ip_vs_mutex);
2039 out_dec:
2040 /* decrease the module use count */
2041 ip_vs_use_count_dec();
2042
2043 return ret;
2044}
2045
2046
2047static void
2048ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2049{
2050 spin_lock_bh(&src->lock);
2051 memcpy(dst, src, (char*)&src->lock - (char*)src);
2052 spin_unlock_bh(&src->lock);
2053}
2054
2055static void
2056ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2057{
2058 dst->protocol = src->protocol;
2059 dst->addr = src->addr;
2060 dst->port = src->port;
2061 dst->fwmark = src->fwmark;
2062 strcpy(dst->sched_name, src->scheduler->name);
2063 dst->flags = src->flags;
2064 dst->timeout = src->timeout / HZ;
2065 dst->netmask = src->netmask;
2066 dst->num_dests = src->num_dests;
2067 ip_vs_copy_stats(&dst->stats, &src->stats);
2068}
2069
2070static inline int
2071__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2072 struct ip_vs_get_services __user *uptr)
2073{
2074 int idx, count=0;
2075 struct ip_vs_service *svc;
2076 struct ip_vs_service_entry entry;
2077 int ret = 0;
2078
2079 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2080 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2081 if (count >= get->num_services)
2082 goto out;
2083 ip_vs_copy_service(&entry, svc);
2084 if (copy_to_user(&uptr->entrytable[count],
2085 &entry, sizeof(entry))) {
2086 ret = -EFAULT;
2087 goto out;
2088 }
2089 count++;
2090 }
2091 }
2092
2093 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2094 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2095 if (count >= get->num_services)
2096 goto out;
2097 ip_vs_copy_service(&entry, svc);
2098 if (copy_to_user(&uptr->entrytable[count],
2099 &entry, sizeof(entry))) {
2100 ret = -EFAULT;
2101 goto out;
2102 }
2103 count++;
2104 }
2105 }
2106 out:
2107 return ret;
2108}
2109
2110static inline int
2111__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2112 struct ip_vs_get_dests __user *uptr)
2113{
2114 struct ip_vs_service *svc;
2115 int ret = 0;
2116
2117 if (get->fwmark)
2118 svc = __ip_vs_svc_fwm_get(get->fwmark);
2119 else
2120 svc = __ip_vs_service_get(get->protocol,
2121 get->addr, get->port);
2122 if (svc) {
2123 int count = 0;
2124 struct ip_vs_dest *dest;
2125 struct ip_vs_dest_entry entry;
2126
2127 list_for_each_entry(dest, &svc->destinations, n_list) {
2128 if (count >= get->num_dests)
2129 break;
2130
2131 entry.addr = dest->addr;
2132 entry.port = dest->port;
2133 entry.conn_flags = atomic_read(&dest->conn_flags);
2134 entry.weight = atomic_read(&dest->weight);
2135 entry.u_threshold = dest->u_threshold;
2136 entry.l_threshold = dest->l_threshold;
2137 entry.activeconns = atomic_read(&dest->activeconns);
2138 entry.inactconns = atomic_read(&dest->inactconns);
2139 entry.persistconns = atomic_read(&dest->persistconns);
2140 ip_vs_copy_stats(&entry.stats, &dest->stats);
2141 if (copy_to_user(&uptr->entrytable[count],
2142 &entry, sizeof(entry))) {
2143 ret = -EFAULT;
2144 break;
2145 }
2146 count++;
2147 }
2148 ip_vs_service_put(svc);
2149 } else
2150 ret = -ESRCH;
2151 return ret;
2152}
2153
2154static inline void
2155__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2156{
2157#ifdef CONFIG_IP_VS_PROTO_TCP
2158 u->tcp_timeout =
2159 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2160 u->tcp_fin_timeout =
2161 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2162#endif
2163#ifdef CONFIG_IP_VS_PROTO_UDP
2164 u->udp_timeout =
2165 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2166#endif
2167}
2168
2169
2170#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2171#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2172#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2173#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2174#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2175#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2176#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2177
2178static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2179 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2180 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2181 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2182 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2183 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2184 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2185 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2186};
2187
2188static int
2189do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2190{
2191 unsigned char arg[128];
2192 int ret = 0;
2193
2194 if (!capable(CAP_NET_ADMIN))
2195 return -EPERM;
2196
2197 if (*len < get_arglen[GET_CMDID(cmd)]) {
2198 IP_VS_ERR("get_ctl: len %u < %u\n",
2199 *len, get_arglen[GET_CMDID(cmd)]);
2200 return -EINVAL;
2201 }
2202
2203 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2204 return -EFAULT;
2205
2206 if (down_interruptible(&__ip_vs_mutex))
2207 return -ERESTARTSYS;
2208
2209 switch (cmd) {
2210 case IP_VS_SO_GET_VERSION:
2211 {
2212 char buf[64];
2213
2214 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2215 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2216 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2217 ret = -EFAULT;
2218 goto out;
2219 }
2220 *len = strlen(buf)+1;
2221 }
2222 break;
2223
2224 case IP_VS_SO_GET_INFO:
2225 {
2226 struct ip_vs_getinfo info;
2227 info.version = IP_VS_VERSION_CODE;
2228 info.size = IP_VS_CONN_TAB_SIZE;
2229 info.num_services = ip_vs_num_services;
2230 if (copy_to_user(user, &info, sizeof(info)) != 0)
2231 ret = -EFAULT;
2232 }
2233 break;
2234
2235 case IP_VS_SO_GET_SERVICES:
2236 {
2237 struct ip_vs_get_services *get;
2238 int size;
2239
2240 get = (struct ip_vs_get_services *)arg;
2241 size = sizeof(*get) +
2242 sizeof(struct ip_vs_service_entry) * get->num_services;
2243 if (*len != size) {
2244 IP_VS_ERR("length: %u != %u\n", *len, size);
2245 ret = -EINVAL;
2246 goto out;
2247 }
2248 ret = __ip_vs_get_service_entries(get, user);
2249 }
2250 break;
2251
2252 case IP_VS_SO_GET_SERVICE:
2253 {
2254 struct ip_vs_service_entry *entry;
2255 struct ip_vs_service *svc;
2256
2257 entry = (struct ip_vs_service_entry *)arg;
2258 if (entry->fwmark)
2259 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2260 else
2261 svc = __ip_vs_service_get(entry->protocol,
2262 entry->addr, entry->port);
2263 if (svc) {
2264 ip_vs_copy_service(entry, svc);
2265 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2266 ret = -EFAULT;
2267 ip_vs_service_put(svc);
2268 } else
2269 ret = -ESRCH;
2270 }
2271 break;
2272
2273 case IP_VS_SO_GET_DESTS:
2274 {
2275 struct ip_vs_get_dests *get;
2276 int size;
2277
2278 get = (struct ip_vs_get_dests *)arg;
2279 size = sizeof(*get) +
2280 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2281 if (*len != size) {
2282 IP_VS_ERR("length: %u != %u\n", *len, size);
2283 ret = -EINVAL;
2284 goto out;
2285 }
2286 ret = __ip_vs_get_dest_entries(get, user);
2287 }
2288 break;
2289
2290 case IP_VS_SO_GET_TIMEOUT:
2291 {
2292 struct ip_vs_timeout_user t;
2293
2294 __ip_vs_get_timeouts(&t);
2295 if (copy_to_user(user, &t, sizeof(t)) != 0)
2296 ret = -EFAULT;
2297 }
2298 break;
2299
2300 case IP_VS_SO_GET_DAEMON:
2301 {
2302 struct ip_vs_daemon_user d[2];
2303
2304 memset(&d, 0, sizeof(d));
2305 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2306 d[0].state = IP_VS_STATE_MASTER;
2307 strcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn);
2308 d[0].syncid = ip_vs_master_syncid;
2309 }
2310 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2311 d[1].state = IP_VS_STATE_BACKUP;
2312 strcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn);
2313 d[1].syncid = ip_vs_backup_syncid;
2314 }
2315 if (copy_to_user(user, &d, sizeof(d)) != 0)
2316 ret = -EFAULT;
2317 }
2318 break;
2319
2320 default:
2321 ret = -EINVAL;
2322 }
2323
2324 out:
2325 up(&__ip_vs_mutex);
2326 return ret;
2327}
2328
2329
2330static struct nf_sockopt_ops ip_vs_sockopts = {
2331 .pf = PF_INET,
2332 .set_optmin = IP_VS_BASE_CTL,
2333 .set_optmax = IP_VS_SO_SET_MAX+1,
2334 .set = do_ip_vs_set_ctl,
2335 .get_optmin = IP_VS_BASE_CTL,
2336 .get_optmax = IP_VS_SO_GET_MAX+1,
2337 .get = do_ip_vs_get_ctl,
2338};
2339
2340
2341int ip_vs_control_init(void)
2342{
2343 int ret;
2344 int idx;
2345
2346 EnterFunction(2);
2347
2348 ret = nf_register_sockopt(&ip_vs_sockopts);
2349 if (ret) {
2350 IP_VS_ERR("cannot register sockopt.\n");
2351 return ret;
2352 }
2353
2354 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
2355 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
2356
2357 sysctl_header = register_sysctl_table(vs_root_table, 0);
2358
2359 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2360 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2361 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2362 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2363 }
2364 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2365 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2366 }
2367
2368 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2369 spin_lock_init(&ip_vs_stats.lock);
2370 ip_vs_new_estimator(&ip_vs_stats);
2371
2372 /* Hook the defense timer */
2373 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2374
2375 LeaveFunction(2);
2376 return 0;
2377}
2378
2379
2380void ip_vs_control_cleanup(void)
2381{
2382 EnterFunction(2);
2383 ip_vs_trash_cleanup();
2384 cancel_rearming_delayed_work(&defense_work);
2385 ip_vs_kill_estimator(&ip_vs_stats);
2386 unregister_sysctl_table(sysctl_header);
2387 proc_net_remove("ip_vs_stats");
2388 proc_net_remove("ip_vs");
2389 nf_unregister_sockopt(&ip_vs_sockopts);
2390 LeaveFunction(2);
2391}