]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add genetlink interface implementation
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #include <net/route.h>
39 #include <net/sock.h>
40 #include <net/genetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #include <net/ip_vs.h>
45
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DEFINE_MUTEX(__ip_vs_mutex);
48
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate = 0;
66 int ip_vs_drop_counter = 0;
67 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69 /* number of virtual services */
70 static int ip_vs_num_services = 0;
71
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry = 0;
74 static int sysctl_ip_vs_drop_packet = 0;
75 static int sysctl_ip_vs_secure_tcp = 0;
76 static int sysctl_ip_vs_amemthresh = 1024;
77 static int sysctl_ip_vs_am_droprate = 10;
78 int sysctl_ip_vs_cache_bypass = 0;
79 int sysctl_ip_vs_expire_nodest_conn = 0;
80 int sysctl_ip_vs_expire_quiescent_template = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
87
88 int ip_vs_get_debug_level(void)
89 {
90         return sysctl_ip_vs_debug_level;
91 }
92 #endif
93
94 /*
95  *      update_defense_level is called from keventd and from sysctl,
96  *      so it needs to protect itself from softirqs
97  */
98 static void update_defense_level(void)
99 {
100         struct sysinfo i;
101         static int old_secure_tcp = 0;
102         int availmem;
103         int nomem;
104         int to_change = -1;
105
106         /* we only count free and buffered memory (in pages) */
107         si_meminfo(&i);
108         availmem = i.freeram + i.bufferram;
109         /* however in linux 2.5 the i.bufferram is total page cache size,
110            we need adjust it */
111         /* si_swapinfo(&i); */
112         /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114         nomem = (availmem < sysctl_ip_vs_amemthresh);
115
116         local_bh_disable();
117
118         /* drop_entry */
119         spin_lock(&__ip_vs_dropentry_lock);
120         switch (sysctl_ip_vs_drop_entry) {
121         case 0:
122                 atomic_set(&ip_vs_dropentry, 0);
123                 break;
124         case 1:
125                 if (nomem) {
126                         atomic_set(&ip_vs_dropentry, 1);
127                         sysctl_ip_vs_drop_entry = 2;
128                 } else {
129                         atomic_set(&ip_vs_dropentry, 0);
130                 }
131                 break;
132         case 2:
133                 if (nomem) {
134                         atomic_set(&ip_vs_dropentry, 1);
135                 } else {
136                         atomic_set(&ip_vs_dropentry, 0);
137                         sysctl_ip_vs_drop_entry = 1;
138                 };
139                 break;
140         case 3:
141                 atomic_set(&ip_vs_dropentry, 1);
142                 break;
143         }
144         spin_unlock(&__ip_vs_dropentry_lock);
145
146         /* drop_packet */
147         spin_lock(&__ip_vs_droppacket_lock);
148         switch (sysctl_ip_vs_drop_packet) {
149         case 0:
150                 ip_vs_drop_rate = 0;
151                 break;
152         case 1:
153                 if (nomem) {
154                         ip_vs_drop_rate = ip_vs_drop_counter
155                                 = sysctl_ip_vs_amemthresh /
156                                 (sysctl_ip_vs_amemthresh-availmem);
157                         sysctl_ip_vs_drop_packet = 2;
158                 } else {
159                         ip_vs_drop_rate = 0;
160                 }
161                 break;
162         case 2:
163                 if (nomem) {
164                         ip_vs_drop_rate = ip_vs_drop_counter
165                                 = sysctl_ip_vs_amemthresh /
166                                 (sysctl_ip_vs_amemthresh-availmem);
167                 } else {
168                         ip_vs_drop_rate = 0;
169                         sysctl_ip_vs_drop_packet = 1;
170                 }
171                 break;
172         case 3:
173                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174                 break;
175         }
176         spin_unlock(&__ip_vs_droppacket_lock);
177
178         /* secure_tcp */
179         write_lock(&__ip_vs_securetcp_lock);
180         switch (sysctl_ip_vs_secure_tcp) {
181         case 0:
182                 if (old_secure_tcp >= 2)
183                         to_change = 0;
184                 break;
185         case 1:
186                 if (nomem) {
187                         if (old_secure_tcp < 2)
188                                 to_change = 1;
189                         sysctl_ip_vs_secure_tcp = 2;
190                 } else {
191                         if (old_secure_tcp >= 2)
192                                 to_change = 0;
193                 }
194                 break;
195         case 2:
196                 if (nomem) {
197                         if (old_secure_tcp < 2)
198                                 to_change = 1;
199                 } else {
200                         if (old_secure_tcp >= 2)
201                                 to_change = 0;
202                         sysctl_ip_vs_secure_tcp = 1;
203                 }
204                 break;
205         case 3:
206                 if (old_secure_tcp < 2)
207                         to_change = 1;
208                 break;
209         }
210         old_secure_tcp = sysctl_ip_vs_secure_tcp;
211         if (to_change >= 0)
212                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213         write_unlock(&__ip_vs_securetcp_lock);
214
215         local_bh_enable();
216 }
217
218
219 /*
220  *      Timer for checking the defense
221  */
222 #define DEFENSE_TIMER_PERIOD    1*HZ
223 static void defense_work_handler(struct work_struct *work);
224 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
225
226 static void defense_work_handler(struct work_struct *work)
227 {
228         update_defense_level();
229         if (atomic_read(&ip_vs_dropentry))
230                 ip_vs_random_dropentry();
231
232         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233 }
234
235 int
236 ip_vs_use_count_inc(void)
237 {
238         return try_module_get(THIS_MODULE);
239 }
240
241 void
242 ip_vs_use_count_dec(void)
243 {
244         module_put(THIS_MODULE);
245 }
246
247
248 /*
249  *      Hash table: for virtual service lookups
250  */
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260 /*
261  *      Hash table: for real service lookups
262  */
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269 /*
270  *      Trash for destinations
271  */
272 static LIST_HEAD(ip_vs_dest_trash);
273
274 /*
275  *      FTP & NULL virtual service counters
276  */
277 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281 /*
282  *      Returns hash value for virtual service
283  */
284 static __inline__ unsigned
285 ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
286 {
287         register unsigned porth = ntohs(port);
288
289         return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290                 & IP_VS_SVC_TAB_MASK;
291 }
292
293 /*
294  *      Returns hash value of fwmark for virtual service lookup
295  */
296 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297 {
298         return fwmark & IP_VS_SVC_TAB_MASK;
299 }
300
301 /*
302  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303  *      or in the ip_vs_svc_fwm_table by fwmark.
304  *      Should be called with locked tables.
305  */
306 static int ip_vs_svc_hash(struct ip_vs_service *svc)
307 {
308         unsigned hash;
309
310         if (svc->flags & IP_VS_SVC_F_HASHED) {
311                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312                           "called from %p\n", __builtin_return_address(0));
313                 return 0;
314         }
315
316         if (svc->fwmark == 0) {
317                 /*
318                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
319                  */
320                 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
321                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
322         } else {
323                 /*
324                  *  Hash it by fwmark in ip_vs_svc_fwm_table
325                  */
326                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
327                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
328         }
329
330         svc->flags |= IP_VS_SVC_F_HASHED;
331         /* increase its refcnt because it is referenced by the svc table */
332         atomic_inc(&svc->refcnt);
333         return 1;
334 }
335
336
337 /*
338  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342 {
343         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
344                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
345                           "called from %p\n", __builtin_return_address(0));
346                 return 0;
347         }
348
349         if (svc->fwmark == 0) {
350                 /* Remove it from the ip_vs_svc_table table */
351                 list_del(&svc->s_list);
352         } else {
353                 /* Remove it from the ip_vs_svc_fwm_table table */
354                 list_del(&svc->f_list);
355         }
356
357         svc->flags &= ~IP_VS_SVC_F_HASHED;
358         atomic_dec(&svc->refcnt);
359         return 1;
360 }
361
362
363 /*
364  *      Get service by {proto,addr,port} in the service table.
365  */
366 static __inline__ struct ip_vs_service *
367 __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
368 {
369         unsigned hash;
370         struct ip_vs_service *svc;
371
372         /* Check for "full" addressed entries */
373         hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
374
375         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
376                 if ((svc->addr == vaddr)
377                     && (svc->port == vport)
378                     && (svc->protocol == protocol)) {
379                         /* HIT */
380                         atomic_inc(&svc->usecnt);
381                         return svc;
382                 }
383         }
384
385         return NULL;
386 }
387
388
389 /*
390  *      Get service by {fwmark} in the service table.
391  */
392 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
393 {
394         unsigned hash;
395         struct ip_vs_service *svc;
396
397         /* Check for fwmark addressed entries */
398         hash = ip_vs_svc_fwm_hashkey(fwmark);
399
400         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
401                 if (svc->fwmark == fwmark) {
402                         /* HIT */
403                         atomic_inc(&svc->usecnt);
404                         return svc;
405                 }
406         }
407
408         return NULL;
409 }
410
411 struct ip_vs_service *
412 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
413 {
414         struct ip_vs_service *svc;
415
416         read_lock(&__ip_vs_svc_lock);
417
418         /*
419          *      Check the table hashed by fwmark first
420          */
421         if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
422                 goto out;
423
424         /*
425          *      Check the table hashed by <protocol,addr,port>
426          *      for "full" addressed entries
427          */
428         svc = __ip_vs_service_get(protocol, vaddr, vport);
429
430         if (svc == NULL
431             && protocol == IPPROTO_TCP
432             && atomic_read(&ip_vs_ftpsvc_counter)
433             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
434                 /*
435                  * Check if ftp service entry exists, the packet
436                  * might belong to FTP data connections.
437                  */
438                 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
439         }
440
441         if (svc == NULL
442             && atomic_read(&ip_vs_nullsvc_counter)) {
443                 /*
444                  * Check if the catch-all port (port zero) exists
445                  */
446                 svc = __ip_vs_service_get(protocol, vaddr, 0);
447         }
448
449   out:
450         read_unlock(&__ip_vs_svc_lock);
451
452         IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
453                   fwmark, ip_vs_proto_name(protocol),
454                   NIPQUAD(vaddr), ntohs(vport),
455                   svc?"hit":"not hit");
456
457         return svc;
458 }
459
460
461 static inline void
462 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
463 {
464         atomic_inc(&svc->refcnt);
465         dest->svc = svc;
466 }
467
468 static inline void
469 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
470 {
471         struct ip_vs_service *svc = dest->svc;
472
473         dest->svc = NULL;
474         if (atomic_dec_and_test(&svc->refcnt))
475                 kfree(svc);
476 }
477
478
479 /*
480  *      Returns hash value for real service
481  */
482 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
483 {
484         register unsigned porth = ntohs(port);
485
486         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
487                 & IP_VS_RTAB_MASK;
488 }
489
490 /*
491  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
492  *      should be called with locked tables.
493  */
494 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
495 {
496         unsigned hash;
497
498         if (!list_empty(&dest->d_list)) {
499                 return 0;
500         }
501
502         /*
503          *      Hash by proto,addr,port,
504          *      which are the parameters of the real service.
505          */
506         hash = ip_vs_rs_hashkey(dest->addr, dest->port);
507         list_add(&dest->d_list, &ip_vs_rtable[hash]);
508
509         return 1;
510 }
511
512 /*
513  *      UNhashes ip_vs_dest from ip_vs_rtable.
514  *      should be called with locked tables.
515  */
516 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
517 {
518         /*
519          * Remove it from the ip_vs_rtable table.
520          */
521         if (!list_empty(&dest->d_list)) {
522                 list_del(&dest->d_list);
523                 INIT_LIST_HEAD(&dest->d_list);
524         }
525
526         return 1;
527 }
528
529 /*
530  *      Lookup real service by <proto,addr,port> in the real service table.
531  */
532 struct ip_vs_dest *
533 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
534 {
535         unsigned hash;
536         struct ip_vs_dest *dest;
537
538         /*
539          *      Check for "full" addressed entries
540          *      Return the first found entry
541          */
542         hash = ip_vs_rs_hashkey(daddr, dport);
543
544         read_lock(&__ip_vs_rs_lock);
545         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
546                 if ((dest->addr == daddr)
547                     && (dest->port == dport)
548                     && ((dest->protocol == protocol) ||
549                         dest->vfwmark)) {
550                         /* HIT */
551                         read_unlock(&__ip_vs_rs_lock);
552                         return dest;
553                 }
554         }
555         read_unlock(&__ip_vs_rs_lock);
556
557         return NULL;
558 }
559
560 /*
561  *      Lookup destination by {addr,port} in the given service
562  */
563 static struct ip_vs_dest *
564 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
565 {
566         struct ip_vs_dest *dest;
567
568         /*
569          * Find the destination for the given service
570          */
571         list_for_each_entry(dest, &svc->destinations, n_list) {
572                 if ((dest->addr == daddr) && (dest->port == dport)) {
573                         /* HIT */
574                         return dest;
575                 }
576         }
577
578         return NULL;
579 }
580
581 /*
582  * Find destination by {daddr,dport,vaddr,protocol}
583  * Cretaed to be used in ip_vs_process_message() in
584  * the backup synchronization daemon. It finds the
585  * destination to be bound to the received connection
586  * on the backup.
587  *
588  * ip_vs_lookup_real_service() looked promissing, but
589  * seems not working as expected.
590  */
591 struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
592                                     __be32 vaddr, __be16 vport, __u16 protocol)
593 {
594         struct ip_vs_dest *dest;
595         struct ip_vs_service *svc;
596
597         svc = ip_vs_service_get(0, protocol, vaddr, vport);
598         if (!svc)
599                 return NULL;
600         dest = ip_vs_lookup_dest(svc, daddr, dport);
601         if (dest)
602                 atomic_inc(&dest->refcnt);
603         ip_vs_service_put(svc);
604         return dest;
605 }
606
607 /*
608  *  Lookup dest by {svc,addr,port} in the destination trash.
609  *  The destination trash is used to hold the destinations that are removed
610  *  from the service table but are still referenced by some conn entries.
611  *  The reason to add the destination trash is when the dest is temporary
612  *  down (either by administrator or by monitor program), the dest can be
613  *  picked back from the trash, the remaining connections to the dest can
614  *  continue, and the counting information of the dest is also useful for
615  *  scheduling.
616  */
617 static struct ip_vs_dest *
618 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
619 {
620         struct ip_vs_dest *dest, *nxt;
621
622         /*
623          * Find the destination in trash
624          */
625         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
626                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
627                           "dest->refcnt=%d\n",
628                           dest->vfwmark,
629                           NIPQUAD(dest->addr), ntohs(dest->port),
630                           atomic_read(&dest->refcnt));
631                 if (dest->addr == daddr &&
632                     dest->port == dport &&
633                     dest->vfwmark == svc->fwmark &&
634                     dest->protocol == svc->protocol &&
635                     (svc->fwmark ||
636                      (dest->vaddr == svc->addr &&
637                       dest->vport == svc->port))) {
638                         /* HIT */
639                         return dest;
640                 }
641
642                 /*
643                  * Try to purge the destination from trash if not referenced
644                  */
645                 if (atomic_read(&dest->refcnt) == 1) {
646                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
647                                   "from trash\n",
648                                   dest->vfwmark,
649                                   NIPQUAD(dest->addr), ntohs(dest->port));
650                         list_del(&dest->n_list);
651                         ip_vs_dst_reset(dest);
652                         __ip_vs_unbind_svc(dest);
653                         kfree(dest);
654                 }
655         }
656
657         return NULL;
658 }
659
660
661 /*
662  *  Clean up all the destinations in the trash
663  *  Called by the ip_vs_control_cleanup()
664  *
665  *  When the ip_vs_control_clearup is activated by ipvs module exit,
666  *  the service tables must have been flushed and all the connections
667  *  are expired, and the refcnt of each destination in the trash must
668  *  be 1, so we simply release them here.
669  */
670 static void ip_vs_trash_cleanup(void)
671 {
672         struct ip_vs_dest *dest, *nxt;
673
674         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
675                 list_del(&dest->n_list);
676                 ip_vs_dst_reset(dest);
677                 __ip_vs_unbind_svc(dest);
678                 kfree(dest);
679         }
680 }
681
682
683 static void
684 ip_vs_zero_stats(struct ip_vs_stats *stats)
685 {
686         spin_lock_bh(&stats->lock);
687
688         stats->conns = 0;
689         stats->inpkts = 0;
690         stats->outpkts = 0;
691         stats->inbytes = 0;
692         stats->outbytes = 0;
693
694         stats->cps = 0;
695         stats->inpps = 0;
696         stats->outpps = 0;
697         stats->inbps = 0;
698         stats->outbps = 0;
699
700         ip_vs_zero_estimator(stats);
701
702         spin_unlock_bh(&stats->lock);
703 }
704
705 /*
706  *      Update a destination in the given service
707  */
708 static void
709 __ip_vs_update_dest(struct ip_vs_service *svc,
710                     struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
711 {
712         int conn_flags;
713
714         /* set the weight and the flags */
715         atomic_set(&dest->weight, udest->weight);
716         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
717
718         /* check if local node and update the flags */
719         if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
720                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
721                         | IP_VS_CONN_F_LOCALNODE;
722         }
723
724         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
725         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
726                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
727         } else {
728                 /*
729                  *    Put the real service in ip_vs_rtable if not present.
730                  *    For now only for NAT!
731                  */
732                 write_lock_bh(&__ip_vs_rs_lock);
733                 ip_vs_rs_hash(dest);
734                 write_unlock_bh(&__ip_vs_rs_lock);
735         }
736         atomic_set(&dest->conn_flags, conn_flags);
737
738         /* bind the service */
739         if (!dest->svc) {
740                 __ip_vs_bind_svc(dest, svc);
741         } else {
742                 if (dest->svc != svc) {
743                         __ip_vs_unbind_svc(dest);
744                         ip_vs_zero_stats(&dest->stats);
745                         __ip_vs_bind_svc(dest, svc);
746                 }
747         }
748
749         /* set the dest status flags */
750         dest->flags |= IP_VS_DEST_F_AVAILABLE;
751
752         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
753                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
754         dest->u_threshold = udest->u_threshold;
755         dest->l_threshold = udest->l_threshold;
756 }
757
758
759 /*
760  *      Create a destination for the given service
761  */
762 static int
763 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
764                struct ip_vs_dest **dest_p)
765 {
766         struct ip_vs_dest *dest;
767         unsigned atype;
768
769         EnterFunction(2);
770
771         atype = inet_addr_type(&init_net, udest->addr);
772         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
773                 return -EINVAL;
774
775         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
776         if (dest == NULL) {
777                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
778                 return -ENOMEM;
779         }
780
781         dest->protocol = svc->protocol;
782         dest->vaddr = svc->addr;
783         dest->vport = svc->port;
784         dest->vfwmark = svc->fwmark;
785         dest->addr = udest->addr;
786         dest->port = udest->port;
787
788         atomic_set(&dest->activeconns, 0);
789         atomic_set(&dest->inactconns, 0);
790         atomic_set(&dest->persistconns, 0);
791         atomic_set(&dest->refcnt, 0);
792
793         INIT_LIST_HEAD(&dest->d_list);
794         spin_lock_init(&dest->dst_lock);
795         spin_lock_init(&dest->stats.lock);
796         __ip_vs_update_dest(svc, dest, udest);
797         ip_vs_new_estimator(&dest->stats);
798
799         *dest_p = dest;
800
801         LeaveFunction(2);
802         return 0;
803 }
804
805
806 /*
807  *      Add a destination into an existing service
808  */
809 static int
810 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
811 {
812         struct ip_vs_dest *dest;
813         __be32 daddr = udest->addr;
814         __be16 dport = udest->port;
815         int ret;
816
817         EnterFunction(2);
818
819         if (udest->weight < 0) {
820                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
821                 return -ERANGE;
822         }
823
824         if (udest->l_threshold > udest->u_threshold) {
825                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
826                           "upper threshold\n");
827                 return -ERANGE;
828         }
829
830         /*
831          * Check if the dest already exists in the list
832          */
833         dest = ip_vs_lookup_dest(svc, daddr, dport);
834         if (dest != NULL) {
835                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
836                 return -EEXIST;
837         }
838
839         /*
840          * Check if the dest already exists in the trash and
841          * is from the same service
842          */
843         dest = ip_vs_trash_get_dest(svc, daddr, dport);
844         if (dest != NULL) {
845                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
846                           "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
847                           NIPQUAD(daddr), ntohs(dport),
848                           atomic_read(&dest->refcnt),
849                           dest->vfwmark,
850                           NIPQUAD(dest->vaddr),
851                           ntohs(dest->vport));
852                 __ip_vs_update_dest(svc, dest, udest);
853
854                 /*
855                  * Get the destination from the trash
856                  */
857                 list_del(&dest->n_list);
858
859                 ip_vs_new_estimator(&dest->stats);
860
861                 write_lock_bh(&__ip_vs_svc_lock);
862
863                 /*
864                  * Wait until all other svc users go away.
865                  */
866                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
867
868                 list_add(&dest->n_list, &svc->destinations);
869                 svc->num_dests++;
870
871                 /* call the update_service function of its scheduler */
872                 svc->scheduler->update_service(svc);
873
874                 write_unlock_bh(&__ip_vs_svc_lock);
875                 return 0;
876         }
877
878         /*
879          * Allocate and initialize the dest structure
880          */
881         ret = ip_vs_new_dest(svc, udest, &dest);
882         if (ret) {
883                 return ret;
884         }
885
886         /*
887          * Add the dest entry into the list
888          */
889         atomic_inc(&dest->refcnt);
890
891         write_lock_bh(&__ip_vs_svc_lock);
892
893         /*
894          * Wait until all other svc users go away.
895          */
896         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
897
898         list_add(&dest->n_list, &svc->destinations);
899         svc->num_dests++;
900
901         /* call the update_service function of its scheduler */
902         svc->scheduler->update_service(svc);
903
904         write_unlock_bh(&__ip_vs_svc_lock);
905
906         LeaveFunction(2);
907
908         return 0;
909 }
910
911
912 /*
913  *      Edit a destination in the given service
914  */
915 static int
916 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
917 {
918         struct ip_vs_dest *dest;
919         __be32 daddr = udest->addr;
920         __be16 dport = udest->port;
921
922         EnterFunction(2);
923
924         if (udest->weight < 0) {
925                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
926                 return -ERANGE;
927         }
928
929         if (udest->l_threshold > udest->u_threshold) {
930                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
931                           "upper threshold\n");
932                 return -ERANGE;
933         }
934
935         /*
936          *  Lookup the destination list
937          */
938         dest = ip_vs_lookup_dest(svc, daddr, dport);
939         if (dest == NULL) {
940                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
941                 return -ENOENT;
942         }
943
944         __ip_vs_update_dest(svc, dest, udest);
945
946         write_lock_bh(&__ip_vs_svc_lock);
947
948         /* Wait until all other svc users go away */
949         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
950
951         /* call the update_service, because server weight may be changed */
952         svc->scheduler->update_service(svc);
953
954         write_unlock_bh(&__ip_vs_svc_lock);
955
956         LeaveFunction(2);
957
958         return 0;
959 }
960
961
962 /*
963  *      Delete a destination (must be already unlinked from the service)
964  */
965 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
966 {
967         ip_vs_kill_estimator(&dest->stats);
968
969         /*
970          *  Remove it from the d-linked list with the real services.
971          */
972         write_lock_bh(&__ip_vs_rs_lock);
973         ip_vs_rs_unhash(dest);
974         write_unlock_bh(&__ip_vs_rs_lock);
975
976         /*
977          *  Decrease the refcnt of the dest, and free the dest
978          *  if nobody refers to it (refcnt=0). Otherwise, throw
979          *  the destination into the trash.
980          */
981         if (atomic_dec_and_test(&dest->refcnt)) {
982                 ip_vs_dst_reset(dest);
983                 /* simply decrease svc->refcnt here, let the caller check
984                    and release the service if nobody refers to it.
985                    Only user context can release destination and service,
986                    and only one user context can update virtual service at a
987                    time, so the operation here is OK */
988                 atomic_dec(&dest->svc->refcnt);
989                 kfree(dest);
990         } else {
991                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
992                           "dest->refcnt=%d\n",
993                           NIPQUAD(dest->addr), ntohs(dest->port),
994                           atomic_read(&dest->refcnt));
995                 list_add(&dest->n_list, &ip_vs_dest_trash);
996                 atomic_inc(&dest->refcnt);
997         }
998 }
999
1000
1001 /*
1002  *      Unlink a destination from the given service
1003  */
1004 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1005                                 struct ip_vs_dest *dest,
1006                                 int svcupd)
1007 {
1008         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1009
1010         /*
1011          *  Remove it from the d-linked destination list.
1012          */
1013         list_del(&dest->n_list);
1014         svc->num_dests--;
1015         if (svcupd) {
1016                 /*
1017                  *  Call the update_service function of its scheduler
1018                  */
1019                 svc->scheduler->update_service(svc);
1020         }
1021 }
1022
1023
1024 /*
1025  *      Delete a destination server in the given service
1026  */
1027 static int
1028 ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1029 {
1030         struct ip_vs_dest *dest;
1031         __be32 daddr = udest->addr;
1032         __be16 dport = udest->port;
1033
1034         EnterFunction(2);
1035
1036         dest = ip_vs_lookup_dest(svc, daddr, dport);
1037         if (dest == NULL) {
1038                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1039                 return -ENOENT;
1040         }
1041
1042         write_lock_bh(&__ip_vs_svc_lock);
1043
1044         /*
1045          *      Wait until all other svc users go away.
1046          */
1047         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1048
1049         /*
1050          *      Unlink dest from the service
1051          */
1052         __ip_vs_unlink_dest(svc, dest, 1);
1053
1054         write_unlock_bh(&__ip_vs_svc_lock);
1055
1056         /*
1057          *      Delete the destination
1058          */
1059         __ip_vs_del_dest(dest);
1060
1061         LeaveFunction(2);
1062
1063         return 0;
1064 }
1065
1066
1067 /*
1068  *      Add a service into the service hash table
1069  */
1070 static int
1071 ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1072 {
1073         int ret = 0;
1074         struct ip_vs_scheduler *sched = NULL;
1075         struct ip_vs_service *svc = NULL;
1076
1077         /* increase the module use count */
1078         ip_vs_use_count_inc();
1079
1080         /* Lookup the scheduler by 'u->sched_name' */
1081         sched = ip_vs_scheduler_get(u->sched_name);
1082         if (sched == NULL) {
1083                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1084                            u->sched_name);
1085                 ret = -ENOENT;
1086                 goto out_mod_dec;
1087         }
1088
1089         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1090         if (svc == NULL) {
1091                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1092                 ret = -ENOMEM;
1093                 goto out_err;
1094         }
1095
1096         /* I'm the first user of the service */
1097         atomic_set(&svc->usecnt, 1);
1098         atomic_set(&svc->refcnt, 0);
1099
1100         svc->protocol = u->protocol;
1101         svc->addr = u->addr;
1102         svc->port = u->port;
1103         svc->fwmark = u->fwmark;
1104         svc->flags = u->flags;
1105         svc->timeout = u->timeout * HZ;
1106         svc->netmask = u->netmask;
1107
1108         INIT_LIST_HEAD(&svc->destinations);
1109         rwlock_init(&svc->sched_lock);
1110         spin_lock_init(&svc->stats.lock);
1111
1112         /* Bind the scheduler */
1113         ret = ip_vs_bind_scheduler(svc, sched);
1114         if (ret)
1115                 goto out_err;
1116         sched = NULL;
1117
1118         /* Update the virtual service counters */
1119         if (svc->port == FTPPORT)
1120                 atomic_inc(&ip_vs_ftpsvc_counter);
1121         else if (svc->port == 0)
1122                 atomic_inc(&ip_vs_nullsvc_counter);
1123
1124         ip_vs_new_estimator(&svc->stats);
1125         ip_vs_num_services++;
1126
1127         /* Hash the service into the service table */
1128         write_lock_bh(&__ip_vs_svc_lock);
1129         ip_vs_svc_hash(svc);
1130         write_unlock_bh(&__ip_vs_svc_lock);
1131
1132         *svc_p = svc;
1133         return 0;
1134
1135   out_err:
1136         if (svc != NULL) {
1137                 if (svc->scheduler)
1138                         ip_vs_unbind_scheduler(svc);
1139                 if (svc->inc) {
1140                         local_bh_disable();
1141                         ip_vs_app_inc_put(svc->inc);
1142                         local_bh_enable();
1143                 }
1144                 kfree(svc);
1145         }
1146         ip_vs_scheduler_put(sched);
1147
1148   out_mod_dec:
1149         /* decrease the module use count */
1150         ip_vs_use_count_dec();
1151
1152         return ret;
1153 }
1154
1155
1156 /*
1157  *      Edit a service and bind it with a new scheduler
1158  */
1159 static int
1160 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1161 {
1162         struct ip_vs_scheduler *sched, *old_sched;
1163         int ret = 0;
1164
1165         /*
1166          * Lookup the scheduler, by 'u->sched_name'
1167          */
1168         sched = ip_vs_scheduler_get(u->sched_name);
1169         if (sched == NULL) {
1170                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1171                            u->sched_name);
1172                 return -ENOENT;
1173         }
1174         old_sched = sched;
1175
1176         write_lock_bh(&__ip_vs_svc_lock);
1177
1178         /*
1179          * Wait until all other svc users go away.
1180          */
1181         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1182
1183         /*
1184          * Set the flags and timeout value
1185          */
1186         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1187         svc->timeout = u->timeout * HZ;
1188         svc->netmask = u->netmask;
1189
1190         old_sched = svc->scheduler;
1191         if (sched != old_sched) {
1192                 /*
1193                  * Unbind the old scheduler
1194                  */
1195                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1196                         old_sched = sched;
1197                         goto out;
1198                 }
1199
1200                 /*
1201                  * Bind the new scheduler
1202                  */
1203                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1204                         /*
1205                          * If ip_vs_bind_scheduler fails, restore the old
1206                          * scheduler.
1207                          * The main reason of failure is out of memory.
1208                          *
1209                          * The question is if the old scheduler can be
1210                          * restored all the time. TODO: if it cannot be
1211                          * restored some time, we must delete the service,
1212                          * otherwise the system may crash.
1213                          */
1214                         ip_vs_bind_scheduler(svc, old_sched);
1215                         old_sched = sched;
1216                         goto out;
1217                 }
1218         }
1219
1220   out:
1221         write_unlock_bh(&__ip_vs_svc_lock);
1222
1223         if (old_sched)
1224                 ip_vs_scheduler_put(old_sched);
1225
1226         return ret;
1227 }
1228
1229
1230 /*
1231  *      Delete a service from the service list
1232  *      - The service must be unlinked, unlocked and not referenced!
1233  *      - We are called under _bh lock
1234  */
1235 static void __ip_vs_del_service(struct ip_vs_service *svc)
1236 {
1237         struct ip_vs_dest *dest, *nxt;
1238         struct ip_vs_scheduler *old_sched;
1239
1240         ip_vs_num_services--;
1241         ip_vs_kill_estimator(&svc->stats);
1242
1243         /* Unbind scheduler */
1244         old_sched = svc->scheduler;
1245         ip_vs_unbind_scheduler(svc);
1246         if (old_sched)
1247                 ip_vs_scheduler_put(old_sched);
1248
1249         /* Unbind app inc */
1250         if (svc->inc) {
1251                 ip_vs_app_inc_put(svc->inc);
1252                 svc->inc = NULL;
1253         }
1254
1255         /*
1256          *    Unlink the whole destination list
1257          */
1258         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1259                 __ip_vs_unlink_dest(svc, dest, 0);
1260                 __ip_vs_del_dest(dest);
1261         }
1262
1263         /*
1264          *    Update the virtual service counters
1265          */
1266         if (svc->port == FTPPORT)
1267                 atomic_dec(&ip_vs_ftpsvc_counter);
1268         else if (svc->port == 0)
1269                 atomic_dec(&ip_vs_nullsvc_counter);
1270
1271         /*
1272          *    Free the service if nobody refers to it
1273          */
1274         if (atomic_read(&svc->refcnt) == 0)
1275                 kfree(svc);
1276
1277         /* decrease the module use count */
1278         ip_vs_use_count_dec();
1279 }
1280
1281 /*
1282  *      Delete a service from the service list
1283  */
1284 static int ip_vs_del_service(struct ip_vs_service *svc)
1285 {
1286         if (svc == NULL)
1287                 return -EEXIST;
1288
1289         /*
1290          * Unhash it from the service table
1291          */
1292         write_lock_bh(&__ip_vs_svc_lock);
1293
1294         ip_vs_svc_unhash(svc);
1295
1296         /*
1297          * Wait until all the svc users go away.
1298          */
1299         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1300
1301         __ip_vs_del_service(svc);
1302
1303         write_unlock_bh(&__ip_vs_svc_lock);
1304
1305         return 0;
1306 }
1307
1308
1309 /*
1310  *      Flush all the virtual services
1311  */
1312 static int ip_vs_flush(void)
1313 {
1314         int idx;
1315         struct ip_vs_service *svc, *nxt;
1316
1317         /*
1318          * Flush the service table hashed by <protocol,addr,port>
1319          */
1320         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1321                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1322                         write_lock_bh(&__ip_vs_svc_lock);
1323                         ip_vs_svc_unhash(svc);
1324                         /*
1325                          * Wait until all the svc users go away.
1326                          */
1327                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1328                         __ip_vs_del_service(svc);
1329                         write_unlock_bh(&__ip_vs_svc_lock);
1330                 }
1331         }
1332
1333         /*
1334          * Flush the service table hashed by fwmark
1335          */
1336         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1337                 list_for_each_entry_safe(svc, nxt,
1338                                          &ip_vs_svc_fwm_table[idx], f_list) {
1339                         write_lock_bh(&__ip_vs_svc_lock);
1340                         ip_vs_svc_unhash(svc);
1341                         /*
1342                          * Wait until all the svc users go away.
1343                          */
1344                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1345                         __ip_vs_del_service(svc);
1346                         write_unlock_bh(&__ip_vs_svc_lock);
1347                 }
1348         }
1349
1350         return 0;
1351 }
1352
1353
1354 /*
1355  *      Zero counters in a service or all services
1356  */
1357 static int ip_vs_zero_service(struct ip_vs_service *svc)
1358 {
1359         struct ip_vs_dest *dest;
1360
1361         write_lock_bh(&__ip_vs_svc_lock);
1362         list_for_each_entry(dest, &svc->destinations, n_list) {
1363                 ip_vs_zero_stats(&dest->stats);
1364         }
1365         ip_vs_zero_stats(&svc->stats);
1366         write_unlock_bh(&__ip_vs_svc_lock);
1367         return 0;
1368 }
1369
1370 static int ip_vs_zero_all(void)
1371 {
1372         int idx;
1373         struct ip_vs_service *svc;
1374
1375         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1376                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1377                         ip_vs_zero_service(svc);
1378                 }
1379         }
1380
1381         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1382                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1383                         ip_vs_zero_service(svc);
1384                 }
1385         }
1386
1387         ip_vs_zero_stats(&ip_vs_stats);
1388         return 0;
1389 }
1390
1391
1392 static int
1393 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1394                      void __user *buffer, size_t *lenp, loff_t *ppos)
1395 {
1396         int *valp = table->data;
1397         int val = *valp;
1398         int rc;
1399
1400         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1401         if (write && (*valp != val)) {
1402                 if ((*valp < 0) || (*valp > 3)) {
1403                         /* Restore the correct value */
1404                         *valp = val;
1405                 } else {
1406                         update_defense_level();
1407                 }
1408         }
1409         return rc;
1410 }
1411
1412
1413 static int
1414 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1415                        void __user *buffer, size_t *lenp, loff_t *ppos)
1416 {
1417         int *valp = table->data;
1418         int val[2];
1419         int rc;
1420
1421         /* backup the value first */
1422         memcpy(val, valp, sizeof(val));
1423
1424         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1425         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1426                 /* Restore the correct value */
1427                 memcpy(valp, val, sizeof(val));
1428         }
1429         return rc;
1430 }
1431
1432
1433 /*
1434  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1435  */
1436
1437 static struct ctl_table vs_vars[] = {
1438         {
1439                 .procname       = "amemthresh",
1440                 .data           = &sysctl_ip_vs_amemthresh,
1441                 .maxlen         = sizeof(int),
1442                 .mode           = 0644,
1443                 .proc_handler   = &proc_dointvec,
1444         },
1445 #ifdef CONFIG_IP_VS_DEBUG
1446         {
1447                 .procname       = "debug_level",
1448                 .data           = &sysctl_ip_vs_debug_level,
1449                 .maxlen         = sizeof(int),
1450                 .mode           = 0644,
1451                 .proc_handler   = &proc_dointvec,
1452         },
1453 #endif
1454         {
1455                 .procname       = "am_droprate",
1456                 .data           = &sysctl_ip_vs_am_droprate,
1457                 .maxlen         = sizeof(int),
1458                 .mode           = 0644,
1459                 .proc_handler   = &proc_dointvec,
1460         },
1461         {
1462                 .procname       = "drop_entry",
1463                 .data           = &sysctl_ip_vs_drop_entry,
1464                 .maxlen         = sizeof(int),
1465                 .mode           = 0644,
1466                 .proc_handler   = &proc_do_defense_mode,
1467         },
1468         {
1469                 .procname       = "drop_packet",
1470                 .data           = &sysctl_ip_vs_drop_packet,
1471                 .maxlen         = sizeof(int),
1472                 .mode           = 0644,
1473                 .proc_handler   = &proc_do_defense_mode,
1474         },
1475         {
1476                 .procname       = "secure_tcp",
1477                 .data           = &sysctl_ip_vs_secure_tcp,
1478                 .maxlen         = sizeof(int),
1479                 .mode           = 0644,
1480                 .proc_handler   = &proc_do_defense_mode,
1481         },
1482 #if 0
1483         {
1484                 .procname       = "timeout_established",
1485                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1486                 .maxlen         = sizeof(int),
1487                 .mode           = 0644,
1488                 .proc_handler   = &proc_dointvec_jiffies,
1489         },
1490         {
1491                 .procname       = "timeout_synsent",
1492                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1493                 .maxlen         = sizeof(int),
1494                 .mode           = 0644,
1495                 .proc_handler   = &proc_dointvec_jiffies,
1496         },
1497         {
1498                 .procname       = "timeout_synrecv",
1499                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1500                 .maxlen         = sizeof(int),
1501                 .mode           = 0644,
1502                 .proc_handler   = &proc_dointvec_jiffies,
1503         },
1504         {
1505                 .procname       = "timeout_finwait",
1506                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1507                 .maxlen         = sizeof(int),
1508                 .mode           = 0644,
1509                 .proc_handler   = &proc_dointvec_jiffies,
1510         },
1511         {
1512                 .procname       = "timeout_timewait",
1513                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1514                 .maxlen         = sizeof(int),
1515                 .mode           = 0644,
1516                 .proc_handler   = &proc_dointvec_jiffies,
1517         },
1518         {
1519                 .procname       = "timeout_close",
1520                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1521                 .maxlen         = sizeof(int),
1522                 .mode           = 0644,
1523                 .proc_handler   = &proc_dointvec_jiffies,
1524         },
1525         {
1526                 .procname       = "timeout_closewait",
1527                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1528                 .maxlen         = sizeof(int),
1529                 .mode           = 0644,
1530                 .proc_handler   = &proc_dointvec_jiffies,
1531         },
1532         {
1533                 .procname       = "timeout_lastack",
1534                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1535                 .maxlen         = sizeof(int),
1536                 .mode           = 0644,
1537                 .proc_handler   = &proc_dointvec_jiffies,
1538         },
1539         {
1540                 .procname       = "timeout_listen",
1541                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1542                 .maxlen         = sizeof(int),
1543                 .mode           = 0644,
1544                 .proc_handler   = &proc_dointvec_jiffies,
1545         },
1546         {
1547                 .procname       = "timeout_synack",
1548                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1549                 .maxlen         = sizeof(int),
1550                 .mode           = 0644,
1551                 .proc_handler   = &proc_dointvec_jiffies,
1552         },
1553         {
1554                 .procname       = "timeout_udp",
1555                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1556                 .maxlen         = sizeof(int),
1557                 .mode           = 0644,
1558                 .proc_handler   = &proc_dointvec_jiffies,
1559         },
1560         {
1561                 .procname       = "timeout_icmp",
1562                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1563                 .maxlen         = sizeof(int),
1564                 .mode           = 0644,
1565                 .proc_handler   = &proc_dointvec_jiffies,
1566         },
1567 #endif
1568         {
1569                 .procname       = "cache_bypass",
1570                 .data           = &sysctl_ip_vs_cache_bypass,
1571                 .maxlen         = sizeof(int),
1572                 .mode           = 0644,
1573                 .proc_handler   = &proc_dointvec,
1574         },
1575         {
1576                 .procname       = "expire_nodest_conn",
1577                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1578                 .maxlen         = sizeof(int),
1579                 .mode           = 0644,
1580                 .proc_handler   = &proc_dointvec,
1581         },
1582         {
1583                 .procname       = "expire_quiescent_template",
1584                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1585                 .maxlen         = sizeof(int),
1586                 .mode           = 0644,
1587                 .proc_handler   = &proc_dointvec,
1588         },
1589         {
1590                 .procname       = "sync_threshold",
1591                 .data           = &sysctl_ip_vs_sync_threshold,
1592                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1593                 .mode           = 0644,
1594                 .proc_handler   = &proc_do_sync_threshold,
1595         },
1596         {
1597                 .procname       = "nat_icmp_send",
1598                 .data           = &sysctl_ip_vs_nat_icmp_send,
1599                 .maxlen         = sizeof(int),
1600                 .mode           = 0644,
1601                 .proc_handler   = &proc_dointvec,
1602         },
1603         { .ctl_name = 0 }
1604 };
1605
1606 const struct ctl_path net_vs_ctl_path[] = {
1607         { .procname = "net", .ctl_name = CTL_NET, },
1608         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1609         { .procname = "vs", },
1610         { }
1611 };
1612 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1613
1614 static struct ctl_table_header * sysctl_header;
1615
1616 #ifdef CONFIG_PROC_FS
1617
1618 struct ip_vs_iter {
1619         struct list_head *table;
1620         int bucket;
1621 };
1622
1623 /*
1624  *      Write the contents of the VS rule table to a PROCfs file.
1625  *      (It is kept just for backward compatibility)
1626  */
1627 static inline const char *ip_vs_fwd_name(unsigned flags)
1628 {
1629         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1630         case IP_VS_CONN_F_LOCALNODE:
1631                 return "Local";
1632         case IP_VS_CONN_F_TUNNEL:
1633                 return "Tunnel";
1634         case IP_VS_CONN_F_DROUTE:
1635                 return "Route";
1636         default:
1637                 return "Masq";
1638         }
1639 }
1640
1641
1642 /* Get the Nth entry in the two lists */
1643 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1644 {
1645         struct ip_vs_iter *iter = seq->private;
1646         int idx;
1647         struct ip_vs_service *svc;
1648
1649         /* look in hash by protocol */
1650         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1651                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1652                         if (pos-- == 0){
1653                                 iter->table = ip_vs_svc_table;
1654                                 iter->bucket = idx;
1655                                 return svc;
1656                         }
1657                 }
1658         }
1659
1660         /* keep looking in fwmark */
1661         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1662                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1663                         if (pos-- == 0) {
1664                                 iter->table = ip_vs_svc_fwm_table;
1665                                 iter->bucket = idx;
1666                                 return svc;
1667                         }
1668                 }
1669         }
1670
1671         return NULL;
1672 }
1673
1674 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1675 {
1676
1677         read_lock_bh(&__ip_vs_svc_lock);
1678         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1679 }
1680
1681
1682 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1683 {
1684         struct list_head *e;
1685         struct ip_vs_iter *iter;
1686         struct ip_vs_service *svc;
1687
1688         ++*pos;
1689         if (v == SEQ_START_TOKEN)
1690                 return ip_vs_info_array(seq,0);
1691
1692         svc = v;
1693         iter = seq->private;
1694
1695         if (iter->table == ip_vs_svc_table) {
1696                 /* next service in table hashed by protocol */
1697                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1698                         return list_entry(e, struct ip_vs_service, s_list);
1699
1700
1701                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1702                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1703                                             s_list) {
1704                                 return svc;
1705                         }
1706                 }
1707
1708                 iter->table = ip_vs_svc_fwm_table;
1709                 iter->bucket = -1;
1710                 goto scan_fwmark;
1711         }
1712
1713         /* next service in hashed by fwmark */
1714         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1715                 return list_entry(e, struct ip_vs_service, f_list);
1716
1717  scan_fwmark:
1718         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1719                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1720                                     f_list)
1721                         return svc;
1722         }
1723
1724         return NULL;
1725 }
1726
1727 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1728 {
1729         read_unlock_bh(&__ip_vs_svc_lock);
1730 }
1731
1732
1733 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1734 {
1735         if (v == SEQ_START_TOKEN) {
1736                 seq_printf(seq,
1737                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1738                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1739                 seq_puts(seq,
1740                          "Prot LocalAddress:Port Scheduler Flags\n");
1741                 seq_puts(seq,
1742                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1743         } else {
1744                 const struct ip_vs_service *svc = v;
1745                 const struct ip_vs_iter *iter = seq->private;
1746                 const struct ip_vs_dest *dest;
1747
1748                 if (iter->table == ip_vs_svc_table)
1749                         seq_printf(seq, "%s  %08X:%04X %s ",
1750                                    ip_vs_proto_name(svc->protocol),
1751                                    ntohl(svc->addr),
1752                                    ntohs(svc->port),
1753                                    svc->scheduler->name);
1754                 else
1755                         seq_printf(seq, "FWM  %08X %s ",
1756                                    svc->fwmark, svc->scheduler->name);
1757
1758                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1759                         seq_printf(seq, "persistent %d %08X\n",
1760                                 svc->timeout,
1761                                 ntohl(svc->netmask));
1762                 else
1763                         seq_putc(seq, '\n');
1764
1765                 list_for_each_entry(dest, &svc->destinations, n_list) {
1766                         seq_printf(seq,
1767                                    "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1768                                    ntohl(dest->addr), ntohs(dest->port),
1769                                    ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1770                                    atomic_read(&dest->weight),
1771                                    atomic_read(&dest->activeconns),
1772                                    atomic_read(&dest->inactconns));
1773                 }
1774         }
1775         return 0;
1776 }
1777
1778 static const struct seq_operations ip_vs_info_seq_ops = {
1779         .start = ip_vs_info_seq_start,
1780         .next  = ip_vs_info_seq_next,
1781         .stop  = ip_vs_info_seq_stop,
1782         .show  = ip_vs_info_seq_show,
1783 };
1784
1785 static int ip_vs_info_open(struct inode *inode, struct file *file)
1786 {
1787         return seq_open_private(file, &ip_vs_info_seq_ops,
1788                         sizeof(struct ip_vs_iter));
1789 }
1790
1791 static const struct file_operations ip_vs_info_fops = {
1792         .owner   = THIS_MODULE,
1793         .open    = ip_vs_info_open,
1794         .read    = seq_read,
1795         .llseek  = seq_lseek,
1796         .release = seq_release_private,
1797 };
1798
1799 #endif
1800
1801 struct ip_vs_stats ip_vs_stats = {
1802         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1803 };
1804
1805 #ifdef CONFIG_PROC_FS
1806 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1807 {
1808
1809 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1810         seq_puts(seq,
1811                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1812         seq_printf(seq,
1813                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1814
1815         spin_lock_bh(&ip_vs_stats.lock);
1816         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1817                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1818                    (unsigned long long) ip_vs_stats.inbytes,
1819                    (unsigned long long) ip_vs_stats.outbytes);
1820
1821 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1822         seq_puts(seq,
1823                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1824         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1825                         ip_vs_stats.cps,
1826                         ip_vs_stats.inpps,
1827                         ip_vs_stats.outpps,
1828                         ip_vs_stats.inbps,
1829                         ip_vs_stats.outbps);
1830         spin_unlock_bh(&ip_vs_stats.lock);
1831
1832         return 0;
1833 }
1834
1835 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1836 {
1837         return single_open(file, ip_vs_stats_show, NULL);
1838 }
1839
1840 static const struct file_operations ip_vs_stats_fops = {
1841         .owner = THIS_MODULE,
1842         .open = ip_vs_stats_seq_open,
1843         .read = seq_read,
1844         .llseek = seq_lseek,
1845         .release = single_release,
1846 };
1847
1848 #endif
1849
1850 /*
1851  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1852  */
1853 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1854 {
1855         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1856                   u->tcp_timeout,
1857                   u->tcp_fin_timeout,
1858                   u->udp_timeout);
1859
1860 #ifdef CONFIG_IP_VS_PROTO_TCP
1861         if (u->tcp_timeout) {
1862                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1863                         = u->tcp_timeout * HZ;
1864         }
1865
1866         if (u->tcp_fin_timeout) {
1867                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1868                         = u->tcp_fin_timeout * HZ;
1869         }
1870 #endif
1871
1872 #ifdef CONFIG_IP_VS_PROTO_UDP
1873         if (u->udp_timeout) {
1874                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1875                         = u->udp_timeout * HZ;
1876         }
1877 #endif
1878         return 0;
1879 }
1880
1881
1882 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1883 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1884 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1885                                  sizeof(struct ip_vs_dest_user))
1886 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1887 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1888 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
1889
1890 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1891         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1892         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1893         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1894         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1895         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1896         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1897         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1898         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1899         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1900         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1901         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1902 };
1903
1904 static int
1905 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1906 {
1907         int ret;
1908         unsigned char arg[MAX_ARG_LEN];
1909         struct ip_vs_service_user *usvc;
1910         struct ip_vs_service *svc;
1911         struct ip_vs_dest_user *udest;
1912
1913         if (!capable(CAP_NET_ADMIN))
1914                 return -EPERM;
1915
1916         if (len != set_arglen[SET_CMDID(cmd)]) {
1917                 IP_VS_ERR("set_ctl: len %u != %u\n",
1918                           len, set_arglen[SET_CMDID(cmd)]);
1919                 return -EINVAL;
1920         }
1921
1922         if (copy_from_user(arg, user, len) != 0)
1923                 return -EFAULT;
1924
1925         /* increase the module use count */
1926         ip_vs_use_count_inc();
1927
1928         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1929                 ret = -ERESTARTSYS;
1930                 goto out_dec;
1931         }
1932
1933         if (cmd == IP_VS_SO_SET_FLUSH) {
1934                 /* Flush the virtual service */
1935                 ret = ip_vs_flush();
1936                 goto out_unlock;
1937         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1938                 /* Set timeout values for (tcp tcpfin udp) */
1939                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1940                 goto out_unlock;
1941         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1942                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1943                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1944                 goto out_unlock;
1945         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1946                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1947                 ret = stop_sync_thread(dm->state);
1948                 goto out_unlock;
1949         }
1950
1951         usvc = (struct ip_vs_service_user *)arg;
1952         udest = (struct ip_vs_dest_user *)(usvc + 1);
1953
1954         if (cmd == IP_VS_SO_SET_ZERO) {
1955                 /* if no service address is set, zero counters in all */
1956                 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1957                         ret = ip_vs_zero_all();
1958                         goto out_unlock;
1959                 }
1960         }
1961
1962         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1963         if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1964                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1965                           usvc->protocol, NIPQUAD(usvc->addr),
1966                           ntohs(usvc->port), usvc->sched_name);
1967                 ret = -EFAULT;
1968                 goto out_unlock;
1969         }
1970
1971         /* Lookup the exact service by <protocol, addr, port> or fwmark */
1972         if (usvc->fwmark == 0)
1973                 svc = __ip_vs_service_get(usvc->protocol,
1974                                           usvc->addr, usvc->port);
1975         else
1976                 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1977
1978         if (cmd != IP_VS_SO_SET_ADD
1979             && (svc == NULL || svc->protocol != usvc->protocol)) {
1980                 ret = -ESRCH;
1981                 goto out_unlock;
1982         }
1983
1984         switch (cmd) {
1985         case IP_VS_SO_SET_ADD:
1986                 if (svc != NULL)
1987                         ret = -EEXIST;
1988                 else
1989                         ret = ip_vs_add_service(usvc, &svc);
1990                 break;
1991         case IP_VS_SO_SET_EDIT:
1992                 ret = ip_vs_edit_service(svc, usvc);
1993                 break;
1994         case IP_VS_SO_SET_DEL:
1995                 ret = ip_vs_del_service(svc);
1996                 if (!ret)
1997                         goto out_unlock;
1998                 break;
1999         case IP_VS_SO_SET_ZERO:
2000                 ret = ip_vs_zero_service(svc);
2001                 break;
2002         case IP_VS_SO_SET_ADDDEST:
2003                 ret = ip_vs_add_dest(svc, udest);
2004                 break;
2005         case IP_VS_SO_SET_EDITDEST:
2006                 ret = ip_vs_edit_dest(svc, udest);
2007                 break;
2008         case IP_VS_SO_SET_DELDEST:
2009                 ret = ip_vs_del_dest(svc, udest);
2010                 break;
2011         default:
2012                 ret = -EINVAL;
2013         }
2014
2015         if (svc)
2016                 ip_vs_service_put(svc);
2017
2018   out_unlock:
2019         mutex_unlock(&__ip_vs_mutex);
2020   out_dec:
2021         /* decrease the module use count */
2022         ip_vs_use_count_dec();
2023
2024         return ret;
2025 }
2026
2027
2028 static void
2029 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2030 {
2031         spin_lock_bh(&src->lock);
2032         memcpy(dst, src, (char*)&src->lock - (char*)src);
2033         spin_unlock_bh(&src->lock);
2034 }
2035
2036 static void
2037 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2038 {
2039         dst->protocol = src->protocol;
2040         dst->addr = src->addr;
2041         dst->port = src->port;
2042         dst->fwmark = src->fwmark;
2043         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2044         dst->flags = src->flags;
2045         dst->timeout = src->timeout / HZ;
2046         dst->netmask = src->netmask;
2047         dst->num_dests = src->num_dests;
2048         ip_vs_copy_stats(&dst->stats, &src->stats);
2049 }
2050
2051 static inline int
2052 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2053                             struct ip_vs_get_services __user *uptr)
2054 {
2055         int idx, count=0;
2056         struct ip_vs_service *svc;
2057         struct ip_vs_service_entry entry;
2058         int ret = 0;
2059
2060         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2061                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2062                         if (count >= get->num_services)
2063                                 goto out;
2064                         memset(&entry, 0, sizeof(entry));
2065                         ip_vs_copy_service(&entry, svc);
2066                         if (copy_to_user(&uptr->entrytable[count],
2067                                          &entry, sizeof(entry))) {
2068                                 ret = -EFAULT;
2069                                 goto out;
2070                         }
2071                         count++;
2072                 }
2073         }
2074
2075         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2076                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2077                         if (count >= get->num_services)
2078                                 goto out;
2079                         memset(&entry, 0, sizeof(entry));
2080                         ip_vs_copy_service(&entry, svc);
2081                         if (copy_to_user(&uptr->entrytable[count],
2082                                          &entry, sizeof(entry))) {
2083                                 ret = -EFAULT;
2084                                 goto out;
2085                         }
2086                         count++;
2087                 }
2088         }
2089   out:
2090         return ret;
2091 }
2092
2093 static inline int
2094 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2095                          struct ip_vs_get_dests __user *uptr)
2096 {
2097         struct ip_vs_service *svc;
2098         int ret = 0;
2099
2100         if (get->fwmark)
2101                 svc = __ip_vs_svc_fwm_get(get->fwmark);
2102         else
2103                 svc = __ip_vs_service_get(get->protocol,
2104                                           get->addr, get->port);
2105         if (svc) {
2106                 int count = 0;
2107                 struct ip_vs_dest *dest;
2108                 struct ip_vs_dest_entry entry;
2109
2110                 list_for_each_entry(dest, &svc->destinations, n_list) {
2111                         if (count >= get->num_dests)
2112                                 break;
2113
2114                         entry.addr = dest->addr;
2115                         entry.port = dest->port;
2116                         entry.conn_flags = atomic_read(&dest->conn_flags);
2117                         entry.weight = atomic_read(&dest->weight);
2118                         entry.u_threshold = dest->u_threshold;
2119                         entry.l_threshold = dest->l_threshold;
2120                         entry.activeconns = atomic_read(&dest->activeconns);
2121                         entry.inactconns = atomic_read(&dest->inactconns);
2122                         entry.persistconns = atomic_read(&dest->persistconns);
2123                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2124                         if (copy_to_user(&uptr->entrytable[count],
2125                                          &entry, sizeof(entry))) {
2126                                 ret = -EFAULT;
2127                                 break;
2128                         }
2129                         count++;
2130                 }
2131                 ip_vs_service_put(svc);
2132         } else
2133                 ret = -ESRCH;
2134         return ret;
2135 }
2136
2137 static inline void
2138 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2139 {
2140 #ifdef CONFIG_IP_VS_PROTO_TCP
2141         u->tcp_timeout =
2142                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2143         u->tcp_fin_timeout =
2144                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2145 #endif
2146 #ifdef CONFIG_IP_VS_PROTO_UDP
2147         u->udp_timeout =
2148                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2149 #endif
2150 }
2151
2152
2153 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2154 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2155 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2156 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2157 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2158 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2159 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2160
2161 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2162         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2163         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2164         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2165         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2166         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2167         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2168         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2169 };
2170
2171 static int
2172 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2173 {
2174         unsigned char arg[128];
2175         int ret = 0;
2176
2177         if (!capable(CAP_NET_ADMIN))
2178                 return -EPERM;
2179
2180         if (*len < get_arglen[GET_CMDID(cmd)]) {
2181                 IP_VS_ERR("get_ctl: len %u < %u\n",
2182                           *len, get_arglen[GET_CMDID(cmd)]);
2183                 return -EINVAL;
2184         }
2185
2186         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2187                 return -EFAULT;
2188
2189         if (mutex_lock_interruptible(&__ip_vs_mutex))
2190                 return -ERESTARTSYS;
2191
2192         switch (cmd) {
2193         case IP_VS_SO_GET_VERSION:
2194         {
2195                 char buf[64];
2196
2197                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2198                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2199                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2200                         ret = -EFAULT;
2201                         goto out;
2202                 }
2203                 *len = strlen(buf)+1;
2204         }
2205         break;
2206
2207         case IP_VS_SO_GET_INFO:
2208         {
2209                 struct ip_vs_getinfo info;
2210                 info.version = IP_VS_VERSION_CODE;
2211                 info.size = IP_VS_CONN_TAB_SIZE;
2212                 info.num_services = ip_vs_num_services;
2213                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2214                         ret = -EFAULT;
2215         }
2216         break;
2217
2218         case IP_VS_SO_GET_SERVICES:
2219         {
2220                 struct ip_vs_get_services *get;
2221                 int size;
2222
2223                 get = (struct ip_vs_get_services *)arg;
2224                 size = sizeof(*get) +
2225                         sizeof(struct ip_vs_service_entry) * get->num_services;
2226                 if (*len != size) {
2227                         IP_VS_ERR("length: %u != %u\n", *len, size);
2228                         ret = -EINVAL;
2229                         goto out;
2230                 }
2231                 ret = __ip_vs_get_service_entries(get, user);
2232         }
2233         break;
2234
2235         case IP_VS_SO_GET_SERVICE:
2236         {
2237                 struct ip_vs_service_entry *entry;
2238                 struct ip_vs_service *svc;
2239
2240                 entry = (struct ip_vs_service_entry *)arg;
2241                 if (entry->fwmark)
2242                         svc = __ip_vs_svc_fwm_get(entry->fwmark);
2243                 else
2244                         svc = __ip_vs_service_get(entry->protocol,
2245                                                   entry->addr, entry->port);
2246                 if (svc) {
2247                         ip_vs_copy_service(entry, svc);
2248                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2249                                 ret = -EFAULT;
2250                         ip_vs_service_put(svc);
2251                 } else
2252                         ret = -ESRCH;
2253         }
2254         break;
2255
2256         case IP_VS_SO_GET_DESTS:
2257         {
2258                 struct ip_vs_get_dests *get;
2259                 int size;
2260
2261                 get = (struct ip_vs_get_dests *)arg;
2262                 size = sizeof(*get) +
2263                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2264                 if (*len != size) {
2265                         IP_VS_ERR("length: %u != %u\n", *len, size);
2266                         ret = -EINVAL;
2267                         goto out;
2268                 }
2269                 ret = __ip_vs_get_dest_entries(get, user);
2270         }
2271         break;
2272
2273         case IP_VS_SO_GET_TIMEOUT:
2274         {
2275                 struct ip_vs_timeout_user t;
2276
2277                 __ip_vs_get_timeouts(&t);
2278                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2279                         ret = -EFAULT;
2280         }
2281         break;
2282
2283         case IP_VS_SO_GET_DAEMON:
2284         {
2285                 struct ip_vs_daemon_user d[2];
2286
2287                 memset(&d, 0, sizeof(d));
2288                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2289                         d[0].state = IP_VS_STATE_MASTER;
2290                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2291                         d[0].syncid = ip_vs_master_syncid;
2292                 }
2293                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2294                         d[1].state = IP_VS_STATE_BACKUP;
2295                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2296                         d[1].syncid = ip_vs_backup_syncid;
2297                 }
2298                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2299                         ret = -EFAULT;
2300         }
2301         break;
2302
2303         default:
2304                 ret = -EINVAL;
2305         }
2306
2307   out:
2308         mutex_unlock(&__ip_vs_mutex);
2309         return ret;
2310 }
2311
2312
2313 static struct nf_sockopt_ops ip_vs_sockopts = {
2314         .pf             = PF_INET,
2315         .set_optmin     = IP_VS_BASE_CTL,
2316         .set_optmax     = IP_VS_SO_SET_MAX+1,
2317         .set            = do_ip_vs_set_ctl,
2318         .get_optmin     = IP_VS_BASE_CTL,
2319         .get_optmax     = IP_VS_SO_GET_MAX+1,
2320         .get            = do_ip_vs_get_ctl,
2321         .owner          = THIS_MODULE,
2322 };
2323
2324 /*
2325  * Generic Netlink interface
2326  */
2327
2328 /* IPVS genetlink family */
2329 static struct genl_family ip_vs_genl_family = {
2330         .id             = GENL_ID_GENERATE,
2331         .hdrsize        = 0,
2332         .name           = IPVS_GENL_NAME,
2333         .version        = IPVS_GENL_VERSION,
2334         .maxattr        = IPVS_CMD_MAX,
2335 };
2336
2337 /* Policy used for first-level command attributes */
2338 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2339         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2340         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2341         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2342         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2343         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2344         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2345 };
2346
2347 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2348 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2349         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2350         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2351                                             .len = IP_VS_IFNAME_MAXLEN },
2352         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2353 };
2354
2355 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2356 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2357         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2358         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2359         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2360                                             .len = sizeof(union nf_inet_addr) },
2361         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2362         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2363         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2364                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2365         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2366                                             .len = sizeof(struct ip_vs_flags) },
2367         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2368         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2369         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2370 };
2371
2372 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2373 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2374         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2375                                             .len = sizeof(union nf_inet_addr) },
2376         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2377         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2378         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2379         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2380         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2381         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2382         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2383         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2384         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2385 };
2386
2387 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2388                                  struct ip_vs_stats *stats)
2389 {
2390         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2391         if (!nl_stats)
2392                 return -EMSGSIZE;
2393
2394         spin_lock_bh(&stats->lock);
2395
2396         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2397         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2398         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2399         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2400         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2401         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2402         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2403         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2404         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2405         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2406
2407         spin_unlock_bh(&stats->lock);
2408
2409         nla_nest_end(skb, nl_stats);
2410
2411         return 0;
2412
2413 nla_put_failure:
2414         spin_unlock_bh(&stats->lock);
2415         nla_nest_cancel(skb, nl_stats);
2416         return -EMSGSIZE;
2417 }
2418
2419 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2420                                    struct ip_vs_service *svc)
2421 {
2422         struct nlattr *nl_service;
2423         struct ip_vs_flags flags = { .flags = svc->flags,
2424                                      .mask = ~0 };
2425
2426         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2427         if (!nl_service)
2428                 return -EMSGSIZE;
2429
2430         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2431
2432         if (svc->fwmark) {
2433                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2434         } else {
2435                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2436                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2437                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2438         }
2439
2440         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2441         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2442         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2443         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2444
2445         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2446                 goto nla_put_failure;
2447
2448         nla_nest_end(skb, nl_service);
2449
2450         return 0;
2451
2452 nla_put_failure:
2453         nla_nest_cancel(skb, nl_service);
2454         return -EMSGSIZE;
2455 }
2456
2457 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2458                                    struct ip_vs_service *svc,
2459                                    struct netlink_callback *cb)
2460 {
2461         void *hdr;
2462
2463         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2464                           &ip_vs_genl_family, NLM_F_MULTI,
2465                           IPVS_CMD_NEW_SERVICE);
2466         if (!hdr)
2467                 return -EMSGSIZE;
2468
2469         if (ip_vs_genl_fill_service(skb, svc) < 0)
2470                 goto nla_put_failure;
2471
2472         return genlmsg_end(skb, hdr);
2473
2474 nla_put_failure:
2475         genlmsg_cancel(skb, hdr);
2476         return -EMSGSIZE;
2477 }
2478
2479 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2480                                     struct netlink_callback *cb)
2481 {
2482         int idx = 0, i;
2483         int start = cb->args[0];
2484         struct ip_vs_service *svc;
2485
2486         mutex_lock(&__ip_vs_mutex);
2487         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2488                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2489                         if (++idx <= start)
2490                                 continue;
2491                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2492                                 idx--;
2493                                 goto nla_put_failure;
2494                         }
2495                 }
2496         }
2497
2498         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2499                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2500                         if (++idx <= start)
2501                                 continue;
2502                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2503                                 idx--;
2504                                 goto nla_put_failure;
2505                         }
2506                 }
2507         }
2508
2509 nla_put_failure:
2510         mutex_unlock(&__ip_vs_mutex);
2511         cb->args[0] = idx;
2512
2513         return skb->len;
2514 }
2515
2516 static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2517                                     struct nlattr *nla, int full_entry)
2518 {
2519         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2520         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2521
2522         /* Parse mandatory identifying service fields first */
2523         if (nla == NULL ||
2524             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2525                 return -EINVAL;
2526
2527         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2528         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2529         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2530         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2531         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2532
2533         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2534                 return -EINVAL;
2535
2536         /* For now, only support IPv4 */
2537         if (nla_get_u16(nla_af) != AF_INET)
2538                 return -EAFNOSUPPORT;
2539
2540         if (nla_fwmark) {
2541                 usvc->protocol = IPPROTO_TCP;
2542                 usvc->fwmark = nla_get_u32(nla_fwmark);
2543         } else {
2544                 usvc->protocol = nla_get_u16(nla_protocol);
2545                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2546                 usvc->port = nla_get_u16(nla_port);
2547                 usvc->fwmark = 0;
2548         }
2549
2550         /* If a full entry was requested, check for the additional fields */
2551         if (full_entry) {
2552                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2553                               *nla_netmask;
2554                 struct ip_vs_flags flags;
2555                 struct ip_vs_service *svc;
2556
2557                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2558                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2559                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2560                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2561
2562                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2563                         return -EINVAL;
2564
2565                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2566
2567                 /* prefill flags from service if it already exists */
2568                 if (usvc->fwmark)
2569                         svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2570                 else
2571                         svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2572                                                   usvc->port);
2573                 if (svc) {
2574                         usvc->flags = svc->flags;
2575                         ip_vs_service_put(svc);
2576                 } else
2577                         usvc->flags = 0;
2578
2579                 /* set new flags from userland */
2580                 usvc->flags = (usvc->flags & ~flags.mask) |
2581                               (flags.flags & flags.mask);
2582
2583                 strlcpy(usvc->sched_name, nla_data(nla_sched),
2584                         sizeof(usvc->sched_name));
2585                 usvc->timeout = nla_get_u32(nla_timeout);
2586                 usvc->netmask = nla_get_u32(nla_netmask);
2587         }
2588
2589         return 0;
2590 }
2591
2592 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2593 {
2594         struct ip_vs_service_user usvc;
2595         int ret;
2596
2597         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2598         if (ret)
2599                 return ERR_PTR(ret);
2600
2601         if (usvc.fwmark)
2602                 return __ip_vs_svc_fwm_get(usvc.fwmark);
2603         else
2604                 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2605                                            usvc.port);
2606 }
2607
2608 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2609 {
2610         struct nlattr *nl_dest;
2611
2612         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2613         if (!nl_dest)
2614                 return -EMSGSIZE;
2615
2616         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2617         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2618
2619         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2620                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2621         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2622         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2623         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2624         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2625                     atomic_read(&dest->activeconns));
2626         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2627                     atomic_read(&dest->inactconns));
2628         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2629                     atomic_read(&dest->persistconns));
2630
2631         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2632                 goto nla_put_failure;
2633
2634         nla_nest_end(skb, nl_dest);
2635
2636         return 0;
2637
2638 nla_put_failure:
2639         nla_nest_cancel(skb, nl_dest);
2640         return -EMSGSIZE;
2641 }
2642
2643 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2644                                 struct netlink_callback *cb)
2645 {
2646         void *hdr;
2647
2648         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2649                           &ip_vs_genl_family, NLM_F_MULTI,
2650                           IPVS_CMD_NEW_DEST);
2651         if (!hdr)
2652                 return -EMSGSIZE;
2653
2654         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2655                 goto nla_put_failure;
2656
2657         return genlmsg_end(skb, hdr);
2658
2659 nla_put_failure:
2660         genlmsg_cancel(skb, hdr);
2661         return -EMSGSIZE;
2662 }
2663
2664 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2665                                  struct netlink_callback *cb)
2666 {
2667         int idx = 0;
2668         int start = cb->args[0];
2669         struct ip_vs_service *svc;
2670         struct ip_vs_dest *dest;
2671         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2672
2673         mutex_lock(&__ip_vs_mutex);
2674
2675         /* Try to find the service for which to dump destinations */
2676         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2677                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2678                 goto out_err;
2679
2680         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2681         if (IS_ERR(svc) || svc == NULL)
2682                 goto out_err;
2683
2684         /* Dump the destinations */
2685         list_for_each_entry(dest, &svc->destinations, n_list) {
2686                 if (++idx <= start)
2687                         continue;
2688                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2689                         idx--;
2690                         goto nla_put_failure;
2691                 }
2692         }
2693
2694 nla_put_failure:
2695         cb->args[0] = idx;
2696         ip_vs_service_put(svc);
2697
2698 out_err:
2699         mutex_unlock(&__ip_vs_mutex);
2700
2701         return skb->len;
2702 }
2703
2704 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2705                                  struct nlattr *nla, int full_entry)
2706 {
2707         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2708         struct nlattr *nla_addr, *nla_port;
2709
2710         /* Parse mandatory identifying destination fields first */
2711         if (nla == NULL ||
2712             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2713                 return -EINVAL;
2714
2715         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2716         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2717
2718         if (!(nla_addr && nla_port))
2719                 return -EINVAL;
2720
2721         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2722         udest->port = nla_get_u16(nla_port);
2723
2724         /* If a full entry was requested, check for the additional fields */
2725         if (full_entry) {
2726                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2727                               *nla_l_thresh;
2728
2729                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2730                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2731                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2732                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2733
2734                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2735                         return -EINVAL;
2736
2737                 udest->conn_flags = nla_get_u32(nla_fwd)
2738                                     & IP_VS_CONN_F_FWD_MASK;
2739                 udest->weight = nla_get_u32(nla_weight);
2740                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2741                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2742         }
2743
2744         return 0;
2745 }
2746
2747 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2748                                   const char *mcast_ifn, __be32 syncid)
2749 {
2750         struct nlattr *nl_daemon;
2751
2752         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2753         if (!nl_daemon)
2754                 return -EMSGSIZE;
2755
2756         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2757         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2758         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2759
2760         nla_nest_end(skb, nl_daemon);
2761
2762         return 0;
2763
2764 nla_put_failure:
2765         nla_nest_cancel(skb, nl_daemon);
2766         return -EMSGSIZE;
2767 }
2768
2769 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2770                                   const char *mcast_ifn, __be32 syncid,
2771                                   struct netlink_callback *cb)
2772 {
2773         void *hdr;
2774         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2775                           &ip_vs_genl_family, NLM_F_MULTI,
2776                           IPVS_CMD_NEW_DAEMON);
2777         if (!hdr)
2778                 return -EMSGSIZE;
2779
2780         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2781                 goto nla_put_failure;
2782
2783         return genlmsg_end(skb, hdr);
2784
2785 nla_put_failure:
2786         genlmsg_cancel(skb, hdr);
2787         return -EMSGSIZE;
2788 }
2789
2790 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2791                                    struct netlink_callback *cb)
2792 {
2793         mutex_lock(&__ip_vs_mutex);
2794         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2795                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2796                                            ip_vs_master_mcast_ifn,
2797                                            ip_vs_master_syncid, cb) < 0)
2798                         goto nla_put_failure;
2799
2800                 cb->args[0] = 1;
2801         }
2802
2803         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2804                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2805                                            ip_vs_backup_mcast_ifn,
2806                                            ip_vs_backup_syncid, cb) < 0)
2807                         goto nla_put_failure;
2808
2809                 cb->args[1] = 1;
2810         }
2811
2812 nla_put_failure:
2813         mutex_unlock(&__ip_vs_mutex);
2814
2815         return skb->len;
2816 }
2817
2818 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2819 {
2820         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2821               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2822               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2823                 return -EINVAL;
2824
2825         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2826                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2827                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2828 }
2829
2830 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2831 {
2832         if (!attrs[IPVS_DAEMON_ATTR_STATE])
2833                 return -EINVAL;
2834
2835         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2836 }
2837
2838 static int ip_vs_genl_set_config(struct nlattr **attrs)
2839 {
2840         struct ip_vs_timeout_user t;
2841
2842         __ip_vs_get_timeouts(&t);
2843
2844         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2845                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2846
2847         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2848                 t.tcp_fin_timeout =
2849                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2850
2851         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2852                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2853
2854         return ip_vs_set_timeout(&t);
2855 }
2856
2857 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2858 {
2859         struct ip_vs_service *svc = NULL;
2860         struct ip_vs_service_user usvc;
2861         struct ip_vs_dest_user udest;
2862         int ret = 0, cmd;
2863         int need_full_svc = 0, need_full_dest = 0;
2864
2865         cmd = info->genlhdr->cmd;
2866
2867         mutex_lock(&__ip_vs_mutex);
2868
2869         if (cmd == IPVS_CMD_FLUSH) {
2870                 ret = ip_vs_flush();
2871                 goto out;
2872         } else if (cmd == IPVS_CMD_SET_CONFIG) {
2873                 ret = ip_vs_genl_set_config(info->attrs);
2874                 goto out;
2875         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2876                    cmd == IPVS_CMD_DEL_DAEMON) {
2877
2878                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2879
2880                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2881                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2882                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
2883                                      ip_vs_daemon_policy)) {
2884                         ret = -EINVAL;
2885                         goto out;
2886                 }
2887
2888                 if (cmd == IPVS_CMD_NEW_DAEMON)
2889                         ret = ip_vs_genl_new_daemon(daemon_attrs);
2890                 else
2891                         ret = ip_vs_genl_del_daemon(daemon_attrs);
2892                 goto out;
2893         } else if (cmd == IPVS_CMD_ZERO &&
2894                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2895                 ret = ip_vs_zero_all();
2896                 goto out;
2897         }
2898
2899         /* All following commands require a service argument, so check if we
2900          * received a valid one. We need a full service specification when
2901          * adding / editing a service. Only identifying members otherwise. */
2902         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2903                 need_full_svc = 1;
2904
2905         ret = ip_vs_genl_parse_service(&usvc,
2906                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
2907                                        need_full_svc);
2908         if (ret)
2909                 goto out;
2910
2911         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2912         if (usvc.fwmark == 0)
2913                 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2914         else
2915                 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2916
2917         /* Unless we're adding a new service, the service must already exist */
2918         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2919                 ret = -ESRCH;
2920                 goto out;
2921         }
2922
2923         /* Destination commands require a valid destination argument. For
2924          * adding / editing a destination, we need a full destination
2925          * specification. */
2926         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2927             cmd == IPVS_CMD_DEL_DEST) {
2928                 if (cmd != IPVS_CMD_DEL_DEST)
2929                         need_full_dest = 1;
2930
2931                 ret = ip_vs_genl_parse_dest(&udest,
2932                                             info->attrs[IPVS_CMD_ATTR_DEST],
2933                                             need_full_dest);
2934                 if (ret)
2935                         goto out;
2936         }
2937
2938         switch (cmd) {
2939         case IPVS_CMD_NEW_SERVICE:
2940                 if (svc == NULL)
2941                         ret = ip_vs_add_service(&usvc, &svc);
2942                 else
2943                         ret = -EEXIST;
2944                 break;
2945         case IPVS_CMD_SET_SERVICE:
2946                 ret = ip_vs_edit_service(svc, &usvc);
2947                 break;
2948         case IPVS_CMD_DEL_SERVICE:
2949                 ret = ip_vs_del_service(svc);
2950                 break;
2951         case IPVS_CMD_NEW_DEST:
2952                 ret = ip_vs_add_dest(svc, &udest);
2953                 break;
2954         case IPVS_CMD_SET_DEST:
2955                 ret = ip_vs_edit_dest(svc, &udest);
2956                 break;
2957         case IPVS_CMD_DEL_DEST:
2958                 ret = ip_vs_del_dest(svc, &udest);
2959                 break;
2960         case IPVS_CMD_ZERO:
2961                 ret = ip_vs_zero_service(svc);
2962                 break;
2963         default:
2964                 ret = -EINVAL;
2965         }
2966
2967 out:
2968         if (svc)
2969                 ip_vs_service_put(svc);
2970         mutex_unlock(&__ip_vs_mutex);
2971
2972         return ret;
2973 }
2974
2975 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2976 {
2977         struct sk_buff *msg;
2978         void *reply;
2979         int ret, cmd, reply_cmd;
2980
2981         cmd = info->genlhdr->cmd;
2982
2983         if (cmd == IPVS_CMD_GET_SERVICE)
2984                 reply_cmd = IPVS_CMD_NEW_SERVICE;
2985         else if (cmd == IPVS_CMD_GET_INFO)
2986                 reply_cmd = IPVS_CMD_SET_INFO;
2987         else if (cmd == IPVS_CMD_GET_CONFIG)
2988                 reply_cmd = IPVS_CMD_SET_CONFIG;
2989         else {
2990                 IP_VS_ERR("unknown Generic Netlink command\n");
2991                 return -EINVAL;
2992         }
2993
2994         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2995         if (!msg)
2996                 return -ENOMEM;
2997
2998         mutex_lock(&__ip_vs_mutex);
2999
3000         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3001         if (reply == NULL)
3002                 goto nla_put_failure;
3003
3004         switch (cmd) {
3005         case IPVS_CMD_GET_SERVICE:
3006         {
3007                 struct ip_vs_service *svc;
3008
3009                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3010                 if (IS_ERR(svc)) {
3011                         ret = PTR_ERR(svc);
3012                         goto out_err;
3013                 } else if (svc) {
3014                         ret = ip_vs_genl_fill_service(msg, svc);
3015                         ip_vs_service_put(svc);
3016                         if (ret)
3017                                 goto nla_put_failure;
3018                 } else {
3019                         ret = -ESRCH;
3020                         goto out_err;
3021                 }
3022
3023                 break;
3024         }
3025
3026         case IPVS_CMD_GET_CONFIG:
3027         {
3028                 struct ip_vs_timeout_user t;
3029
3030                 __ip_vs_get_timeouts(&t);
3031 #ifdef CONFIG_IP_VS_PROTO_TCP
3032                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3033                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3034                             t.tcp_fin_timeout);
3035 #endif
3036 #ifdef CONFIG_IP_VS_PROTO_UDP
3037                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3038 #endif
3039
3040                 break;
3041         }
3042
3043         case IPVS_CMD_GET_INFO:
3044                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3045                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3046                             IP_VS_CONN_TAB_SIZE);
3047                 break;
3048         }
3049
3050         genlmsg_end(msg, reply);
3051         ret = genlmsg_unicast(msg, info->snd_pid);
3052         goto out;
3053
3054 nla_put_failure:
3055         IP_VS_ERR("not enough space in Netlink message\n");
3056         ret = -EMSGSIZE;
3057
3058 out_err:
3059         nlmsg_free(msg);
3060 out:
3061         mutex_unlock(&__ip_vs_mutex);
3062
3063         return ret;
3064 }
3065
3066
3067 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3068         {
3069                 .cmd    = IPVS_CMD_NEW_SERVICE,
3070                 .flags  = GENL_ADMIN_PERM,
3071                 .policy = ip_vs_cmd_policy,
3072                 .doit   = ip_vs_genl_set_cmd,
3073         },
3074         {
3075                 .cmd    = IPVS_CMD_SET_SERVICE,
3076                 .flags  = GENL_ADMIN_PERM,
3077                 .policy = ip_vs_cmd_policy,
3078                 .doit   = ip_vs_genl_set_cmd,
3079         },
3080         {
3081                 .cmd    = IPVS_CMD_DEL_SERVICE,
3082                 .flags  = GENL_ADMIN_PERM,
3083                 .policy = ip_vs_cmd_policy,
3084                 .doit   = ip_vs_genl_set_cmd,
3085         },
3086         {
3087                 .cmd    = IPVS_CMD_GET_SERVICE,
3088                 .flags  = GENL_ADMIN_PERM,
3089                 .doit   = ip_vs_genl_get_cmd,
3090                 .dumpit = ip_vs_genl_dump_services,
3091                 .policy = ip_vs_cmd_policy,
3092         },
3093         {
3094                 .cmd    = IPVS_CMD_NEW_DEST,
3095                 .flags  = GENL_ADMIN_PERM,
3096                 .policy = ip_vs_cmd_policy,
3097                 .doit   = ip_vs_genl_set_cmd,
3098         },
3099         {
3100                 .cmd    = IPVS_CMD_SET_DEST,
3101                 .flags  = GENL_ADMIN_PERM,
3102                 .policy = ip_vs_cmd_policy,
3103                 .doit   = ip_vs_genl_set_cmd,
3104         },
3105         {
3106                 .cmd    = IPVS_CMD_DEL_DEST,
3107                 .flags  = GENL_ADMIN_PERM,
3108                 .policy = ip_vs_cmd_policy,
3109                 .doit   = ip_vs_genl_set_cmd,
3110         },
3111         {
3112                 .cmd    = IPVS_CMD_GET_DEST,
3113                 .flags  = GENL_ADMIN_PERM,
3114                 .policy = ip_vs_cmd_policy,
3115                 .dumpit = ip_vs_genl_dump_dests,
3116         },
3117         {
3118                 .cmd    = IPVS_CMD_NEW_DAEMON,
3119                 .flags  = GENL_ADMIN_PERM,
3120                 .policy = ip_vs_cmd_policy,
3121                 .doit   = ip_vs_genl_set_cmd,
3122         },
3123         {
3124                 .cmd    = IPVS_CMD_DEL_DAEMON,
3125                 .flags  = GENL_ADMIN_PERM,
3126                 .policy = ip_vs_cmd_policy,
3127                 .doit   = ip_vs_genl_set_cmd,
3128         },
3129         {
3130                 .cmd    = IPVS_CMD_GET_DAEMON,
3131                 .flags  = GENL_ADMIN_PERM,
3132                 .dumpit = ip_vs_genl_dump_daemons,
3133         },
3134         {
3135                 .cmd    = IPVS_CMD_SET_CONFIG,
3136                 .flags  = GENL_ADMIN_PERM,
3137                 .policy = ip_vs_cmd_policy,
3138                 .doit   = ip_vs_genl_set_cmd,
3139         },
3140         {
3141                 .cmd    = IPVS_CMD_GET_CONFIG,
3142                 .flags  = GENL_ADMIN_PERM,
3143                 .doit   = ip_vs_genl_get_cmd,
3144         },
3145         {
3146                 .cmd    = IPVS_CMD_GET_INFO,
3147                 .flags  = GENL_ADMIN_PERM,
3148                 .doit   = ip_vs_genl_get_cmd,
3149         },
3150         {
3151                 .cmd    = IPVS_CMD_ZERO,
3152                 .flags  = GENL_ADMIN_PERM,
3153                 .policy = ip_vs_cmd_policy,
3154                 .doit   = ip_vs_genl_set_cmd,
3155         },
3156         {
3157                 .cmd    = IPVS_CMD_FLUSH,
3158                 .flags  = GENL_ADMIN_PERM,
3159                 .doit   = ip_vs_genl_set_cmd,
3160         },
3161 };
3162
3163 static int __init ip_vs_genl_register(void)
3164 {
3165         int ret, i;
3166
3167         ret = genl_register_family(&ip_vs_genl_family);
3168         if (ret)
3169                 return ret;
3170
3171         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3172                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3173                 if (ret)
3174                         goto err_out;
3175         }
3176         return 0;
3177
3178 err_out:
3179         genl_unregister_family(&ip_vs_genl_family);
3180         return ret;
3181 }
3182
3183 static void ip_vs_genl_unregister(void)
3184 {
3185         genl_unregister_family(&ip_vs_genl_family);
3186 }
3187
3188 /* End of Generic Netlink interface definitions */
3189
3190
3191 int __init ip_vs_control_init(void)
3192 {
3193         int ret;
3194         int idx;
3195
3196         EnterFunction(2);
3197
3198         ret = nf_register_sockopt(&ip_vs_sockopts);
3199         if (ret) {
3200                 IP_VS_ERR("cannot register sockopt.\n");
3201                 return ret;
3202         }
3203
3204         ret = ip_vs_genl_register();
3205         if (ret) {
3206                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3207                 nf_unregister_sockopt(&ip_vs_sockopts);
3208                 return ret;
3209         }
3210
3211         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3212         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3213
3214         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3215
3216         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3217         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3218                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3219                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3220         }
3221         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3222                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3223         }
3224
3225         ip_vs_new_estimator(&ip_vs_stats);
3226
3227         /* Hook the defense timer */
3228         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3229
3230         LeaveFunction(2);
3231         return 0;
3232 }
3233
3234
3235 void ip_vs_control_cleanup(void)
3236 {
3237         EnterFunction(2);
3238         ip_vs_trash_cleanup();
3239         cancel_rearming_delayed_work(&defense_work);
3240         cancel_work_sync(&defense_work.work);
3241         ip_vs_kill_estimator(&ip_vs_stats);
3242         unregister_sysctl_table(sysctl_header);
3243         proc_net_remove(&init_net, "ip_vs_stats");
3244         proc_net_remove(&init_net, "ip_vs");
3245         ip_vs_genl_unregister();
3246         nf_unregister_sockopt(&ip_vs_sockopts);
3247         LeaveFunction(2);
3248 }