]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Change IPVS data structures to support IPv6 addresses
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #include <net/route.h>
39 #include <net/sock.h>
40 #include <net/genetlink.h>
41
42 #include <asm/uaccess.h>
43
44 #include <net/ip_vs.h>
45
46 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
47 static DEFINE_MUTEX(__ip_vs_mutex);
48
49 /* lock for service table */
50 static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52 /* lock for table with the real services */
53 static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55 /* lock for state and timeout tables */
56 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58 /* lock for drop entry handling */
59 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61 /* lock for drop packet handling */
62 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64 /* 1/rate drop and drop-entry variables */
65 int ip_vs_drop_rate = 0;
66 int ip_vs_drop_counter = 0;
67 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69 /* number of virtual services */
70 static int ip_vs_num_services = 0;
71
72 /* sysctl variables */
73 static int sysctl_ip_vs_drop_entry = 0;
74 static int sysctl_ip_vs_drop_packet = 0;
75 static int sysctl_ip_vs_secure_tcp = 0;
76 static int sysctl_ip_vs_amemthresh = 1024;
77 static int sysctl_ip_vs_am_droprate = 10;
78 int sysctl_ip_vs_cache_bypass = 0;
79 int sysctl_ip_vs_expire_nodest_conn = 0;
80 int sysctl_ip_vs_expire_quiescent_template = 0;
81 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82 int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85 #ifdef CONFIG_IP_VS_DEBUG
86 static int sysctl_ip_vs_debug_level = 0;
87
88 int ip_vs_get_debug_level(void)
89 {
90         return sysctl_ip_vs_debug_level;
91 }
92 #endif
93
94 /*
95  *      update_defense_level is called from keventd and from sysctl,
96  *      so it needs to protect itself from softirqs
97  */
98 static void update_defense_level(void)
99 {
100         struct sysinfo i;
101         static int old_secure_tcp = 0;
102         int availmem;
103         int nomem;
104         int to_change = -1;
105
106         /* we only count free and buffered memory (in pages) */
107         si_meminfo(&i);
108         availmem = i.freeram + i.bufferram;
109         /* however in linux 2.5 the i.bufferram is total page cache size,
110            we need adjust it */
111         /* si_swapinfo(&i); */
112         /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114         nomem = (availmem < sysctl_ip_vs_amemthresh);
115
116         local_bh_disable();
117
118         /* drop_entry */
119         spin_lock(&__ip_vs_dropentry_lock);
120         switch (sysctl_ip_vs_drop_entry) {
121         case 0:
122                 atomic_set(&ip_vs_dropentry, 0);
123                 break;
124         case 1:
125                 if (nomem) {
126                         atomic_set(&ip_vs_dropentry, 1);
127                         sysctl_ip_vs_drop_entry = 2;
128                 } else {
129                         atomic_set(&ip_vs_dropentry, 0);
130                 }
131                 break;
132         case 2:
133                 if (nomem) {
134                         atomic_set(&ip_vs_dropentry, 1);
135                 } else {
136                         atomic_set(&ip_vs_dropentry, 0);
137                         sysctl_ip_vs_drop_entry = 1;
138                 };
139                 break;
140         case 3:
141                 atomic_set(&ip_vs_dropentry, 1);
142                 break;
143         }
144         spin_unlock(&__ip_vs_dropentry_lock);
145
146         /* drop_packet */
147         spin_lock(&__ip_vs_droppacket_lock);
148         switch (sysctl_ip_vs_drop_packet) {
149         case 0:
150                 ip_vs_drop_rate = 0;
151                 break;
152         case 1:
153                 if (nomem) {
154                         ip_vs_drop_rate = ip_vs_drop_counter
155                                 = sysctl_ip_vs_amemthresh /
156                                 (sysctl_ip_vs_amemthresh-availmem);
157                         sysctl_ip_vs_drop_packet = 2;
158                 } else {
159                         ip_vs_drop_rate = 0;
160                 }
161                 break;
162         case 2:
163                 if (nomem) {
164                         ip_vs_drop_rate = ip_vs_drop_counter
165                                 = sysctl_ip_vs_amemthresh /
166                                 (sysctl_ip_vs_amemthresh-availmem);
167                 } else {
168                         ip_vs_drop_rate = 0;
169                         sysctl_ip_vs_drop_packet = 1;
170                 }
171                 break;
172         case 3:
173                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174                 break;
175         }
176         spin_unlock(&__ip_vs_droppacket_lock);
177
178         /* secure_tcp */
179         write_lock(&__ip_vs_securetcp_lock);
180         switch (sysctl_ip_vs_secure_tcp) {
181         case 0:
182                 if (old_secure_tcp >= 2)
183                         to_change = 0;
184                 break;
185         case 1:
186                 if (nomem) {
187                         if (old_secure_tcp < 2)
188                                 to_change = 1;
189                         sysctl_ip_vs_secure_tcp = 2;
190                 } else {
191                         if (old_secure_tcp >= 2)
192                                 to_change = 0;
193                 }
194                 break;
195         case 2:
196                 if (nomem) {
197                         if (old_secure_tcp < 2)
198                                 to_change = 1;
199                 } else {
200                         if (old_secure_tcp >= 2)
201                                 to_change = 0;
202                         sysctl_ip_vs_secure_tcp = 1;
203                 }
204                 break;
205         case 3:
206                 if (old_secure_tcp < 2)
207                         to_change = 1;
208                 break;
209         }
210         old_secure_tcp = sysctl_ip_vs_secure_tcp;
211         if (to_change >= 0)
212                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213         write_unlock(&__ip_vs_securetcp_lock);
214
215         local_bh_enable();
216 }
217
218
219 /*
220  *      Timer for checking the defense
221  */
222 #define DEFENSE_TIMER_PERIOD    1*HZ
223 static void defense_work_handler(struct work_struct *work);
224 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
225
226 static void defense_work_handler(struct work_struct *work)
227 {
228         update_defense_level();
229         if (atomic_read(&ip_vs_dropentry))
230                 ip_vs_random_dropentry();
231
232         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233 }
234
235 int
236 ip_vs_use_count_inc(void)
237 {
238         return try_module_get(THIS_MODULE);
239 }
240
241 void
242 ip_vs_use_count_dec(void)
243 {
244         module_put(THIS_MODULE);
245 }
246
247
248 /*
249  *      Hash table: for virtual service lookups
250  */
251 #define IP_VS_SVC_TAB_BITS 8
252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255 /* the service table hashed by <protocol, addr, port> */
256 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257 /* the service table hashed by fwmark */
258 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260 /*
261  *      Hash table: for real service lookups
262  */
263 #define IP_VS_RTAB_BITS 4
264 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269 /*
270  *      Trash for destinations
271  */
272 static LIST_HEAD(ip_vs_dest_trash);
273
274 /*
275  *      FTP & NULL virtual service counters
276  */
277 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281 /*
282  *      Returns hash value for virtual service
283  */
284 static __inline__ unsigned
285 ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
286 {
287         register unsigned porth = ntohs(port);
288
289         return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
290                 & IP_VS_SVC_TAB_MASK;
291 }
292
293 /*
294  *      Returns hash value of fwmark for virtual service lookup
295  */
296 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
297 {
298         return fwmark & IP_VS_SVC_TAB_MASK;
299 }
300
301 /*
302  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
303  *      or in the ip_vs_svc_fwm_table by fwmark.
304  *      Should be called with locked tables.
305  */
306 static int ip_vs_svc_hash(struct ip_vs_service *svc)
307 {
308         unsigned hash;
309
310         if (svc->flags & IP_VS_SVC_F_HASHED) {
311                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
312                           "called from %p\n", __builtin_return_address(0));
313                 return 0;
314         }
315
316         if (svc->fwmark == 0) {
317                 /*
318                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
319                  */
320                 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip,
321                                          svc->port);
322                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
323         } else {
324                 /*
325                  *  Hash it by fwmark in ip_vs_svc_fwm_table
326                  */
327                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
328                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
329         }
330
331         svc->flags |= IP_VS_SVC_F_HASHED;
332         /* increase its refcnt because it is referenced by the svc table */
333         atomic_inc(&svc->refcnt);
334         return 1;
335 }
336
337
338 /*
339  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
340  *      Should be called with locked tables.
341  */
342 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
343 {
344         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
345                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
346                           "called from %p\n", __builtin_return_address(0));
347                 return 0;
348         }
349
350         if (svc->fwmark == 0) {
351                 /* Remove it from the ip_vs_svc_table table */
352                 list_del(&svc->s_list);
353         } else {
354                 /* Remove it from the ip_vs_svc_fwm_table table */
355                 list_del(&svc->f_list);
356         }
357
358         svc->flags &= ~IP_VS_SVC_F_HASHED;
359         atomic_dec(&svc->refcnt);
360         return 1;
361 }
362
363
364 /*
365  *      Get service by {proto,addr,port} in the service table.
366  */
367 static __inline__ struct ip_vs_service *
368 __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
369 {
370         unsigned hash;
371         struct ip_vs_service *svc;
372
373         /* Check for "full" addressed entries */
374         hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
375
376         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
377                 if ((svc->addr.ip == vaddr)
378                     && (svc->port == vport)
379                     && (svc->protocol == protocol)) {
380                         /* HIT */
381                         atomic_inc(&svc->usecnt);
382                         return svc;
383                 }
384         }
385
386         return NULL;
387 }
388
389
390 /*
391  *      Get service by {fwmark} in the service table.
392  */
393 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
394 {
395         unsigned hash;
396         struct ip_vs_service *svc;
397
398         /* Check for fwmark addressed entries */
399         hash = ip_vs_svc_fwm_hashkey(fwmark);
400
401         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
402                 if (svc->fwmark == fwmark) {
403                         /* HIT */
404                         atomic_inc(&svc->usecnt);
405                         return svc;
406                 }
407         }
408
409         return NULL;
410 }
411
412 struct ip_vs_service *
413 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
414 {
415         struct ip_vs_service *svc;
416
417         read_lock(&__ip_vs_svc_lock);
418
419         /*
420          *      Check the table hashed by fwmark first
421          */
422         if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
423                 goto out;
424
425         /*
426          *      Check the table hashed by <protocol,addr,port>
427          *      for "full" addressed entries
428          */
429         svc = __ip_vs_service_get(protocol, vaddr, vport);
430
431         if (svc == NULL
432             && protocol == IPPROTO_TCP
433             && atomic_read(&ip_vs_ftpsvc_counter)
434             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
435                 /*
436                  * Check if ftp service entry exists, the packet
437                  * might belong to FTP data connections.
438                  */
439                 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
440         }
441
442         if (svc == NULL
443             && atomic_read(&ip_vs_nullsvc_counter)) {
444                 /*
445                  * Check if the catch-all port (port zero) exists
446                  */
447                 svc = __ip_vs_service_get(protocol, vaddr, 0);
448         }
449
450   out:
451         read_unlock(&__ip_vs_svc_lock);
452
453         IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
454                   fwmark, ip_vs_proto_name(protocol),
455                   NIPQUAD(vaddr), ntohs(vport),
456                   svc?"hit":"not hit");
457
458         return svc;
459 }
460
461
462 static inline void
463 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
464 {
465         atomic_inc(&svc->refcnt);
466         dest->svc = svc;
467 }
468
469 static inline void
470 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
471 {
472         struct ip_vs_service *svc = dest->svc;
473
474         dest->svc = NULL;
475         if (atomic_dec_and_test(&svc->refcnt))
476                 kfree(svc);
477 }
478
479
480 /*
481  *      Returns hash value for real service
482  */
483 static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
484 {
485         register unsigned porth = ntohs(port);
486
487         return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
488                 & IP_VS_RTAB_MASK;
489 }
490
491 /*
492  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
493  *      should be called with locked tables.
494  */
495 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
496 {
497         unsigned hash;
498
499         if (!list_empty(&dest->d_list)) {
500                 return 0;
501         }
502
503         /*
504          *      Hash by proto,addr,port,
505          *      which are the parameters of the real service.
506          */
507         hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
508         list_add(&dest->d_list, &ip_vs_rtable[hash]);
509
510         return 1;
511 }
512
513 /*
514  *      UNhashes ip_vs_dest from ip_vs_rtable.
515  *      should be called with locked tables.
516  */
517 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
518 {
519         /*
520          * Remove it from the ip_vs_rtable table.
521          */
522         if (!list_empty(&dest->d_list)) {
523                 list_del(&dest->d_list);
524                 INIT_LIST_HEAD(&dest->d_list);
525         }
526
527         return 1;
528 }
529
530 /*
531  *      Lookup real service by <proto,addr,port> in the real service table.
532  */
533 struct ip_vs_dest *
534 ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
535 {
536         unsigned hash;
537         struct ip_vs_dest *dest;
538
539         /*
540          *      Check for "full" addressed entries
541          *      Return the first found entry
542          */
543         hash = ip_vs_rs_hashkey(daddr, dport);
544
545         read_lock(&__ip_vs_rs_lock);
546         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
547                 if ((dest->addr.ip == daddr)
548                     && (dest->port == dport)
549                     && ((dest->protocol == protocol) ||
550                         dest->vfwmark)) {
551                         /* HIT */
552                         read_unlock(&__ip_vs_rs_lock);
553                         return dest;
554                 }
555         }
556         read_unlock(&__ip_vs_rs_lock);
557
558         return NULL;
559 }
560
561 /*
562  *      Lookup destination by {addr,port} in the given service
563  */
564 static struct ip_vs_dest *
565 ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
566 {
567         struct ip_vs_dest *dest;
568
569         /*
570          * Find the destination for the given service
571          */
572         list_for_each_entry(dest, &svc->destinations, n_list) {
573                 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
574                         /* HIT */
575                         return dest;
576                 }
577         }
578
579         return NULL;
580 }
581
582 /*
583  * Find destination by {daddr,dport,vaddr,protocol}
584  * Cretaed to be used in ip_vs_process_message() in
585  * the backup synchronization daemon. It finds the
586  * destination to be bound to the received connection
587  * on the backup.
588  *
589  * ip_vs_lookup_real_service() looked promissing, but
590  * seems not working as expected.
591  */
592 struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
593                                     __be32 vaddr, __be16 vport, __u16 protocol)
594 {
595         struct ip_vs_dest *dest;
596         struct ip_vs_service *svc;
597
598         svc = ip_vs_service_get(0, protocol, vaddr, vport);
599         if (!svc)
600                 return NULL;
601         dest = ip_vs_lookup_dest(svc, daddr, dport);
602         if (dest)
603                 atomic_inc(&dest->refcnt);
604         ip_vs_service_put(svc);
605         return dest;
606 }
607
608 /*
609  *  Lookup dest by {svc,addr,port} in the destination trash.
610  *  The destination trash is used to hold the destinations that are removed
611  *  from the service table but are still referenced by some conn entries.
612  *  The reason to add the destination trash is when the dest is temporary
613  *  down (either by administrator or by monitor program), the dest can be
614  *  picked back from the trash, the remaining connections to the dest can
615  *  continue, and the counting information of the dest is also useful for
616  *  scheduling.
617  */
618 static struct ip_vs_dest *
619 ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
620 {
621         struct ip_vs_dest *dest, *nxt;
622
623         /*
624          * Find the destination in trash
625          */
626         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
627                 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
628                           "dest->refcnt=%d\n",
629                           dest->vfwmark,
630                           NIPQUAD(dest->addr.ip), ntohs(dest->port),
631                           atomic_read(&dest->refcnt));
632                 if (dest->addr.ip == daddr &&
633                     dest->port == dport &&
634                     dest->vfwmark == svc->fwmark &&
635                     dest->protocol == svc->protocol &&
636                     (svc->fwmark ||
637                      (dest->vaddr.ip == svc->addr.ip &&
638                       dest->vport == svc->port))) {
639                         /* HIT */
640                         return dest;
641                 }
642
643                 /*
644                  * Try to purge the destination from trash if not referenced
645                  */
646                 if (atomic_read(&dest->refcnt) == 1) {
647                         IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
648                                   "from trash\n",
649                                   dest->vfwmark,
650                                   NIPQUAD(dest->addr.ip), ntohs(dest->port));
651                         list_del(&dest->n_list);
652                         ip_vs_dst_reset(dest);
653                         __ip_vs_unbind_svc(dest);
654                         kfree(dest);
655                 }
656         }
657
658         return NULL;
659 }
660
661
662 /*
663  *  Clean up all the destinations in the trash
664  *  Called by the ip_vs_control_cleanup()
665  *
666  *  When the ip_vs_control_clearup is activated by ipvs module exit,
667  *  the service tables must have been flushed and all the connections
668  *  are expired, and the refcnt of each destination in the trash must
669  *  be 1, so we simply release them here.
670  */
671 static void ip_vs_trash_cleanup(void)
672 {
673         struct ip_vs_dest *dest, *nxt;
674
675         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
676                 list_del(&dest->n_list);
677                 ip_vs_dst_reset(dest);
678                 __ip_vs_unbind_svc(dest);
679                 kfree(dest);
680         }
681 }
682
683
684 static void
685 ip_vs_zero_stats(struct ip_vs_stats *stats)
686 {
687         spin_lock_bh(&stats->lock);
688
689         stats->conns = 0;
690         stats->inpkts = 0;
691         stats->outpkts = 0;
692         stats->inbytes = 0;
693         stats->outbytes = 0;
694
695         stats->cps = 0;
696         stats->inpps = 0;
697         stats->outpps = 0;
698         stats->inbps = 0;
699         stats->outbps = 0;
700
701         ip_vs_zero_estimator(stats);
702
703         spin_unlock_bh(&stats->lock);
704 }
705
706 /*
707  *      Update a destination in the given service
708  */
709 static void
710 __ip_vs_update_dest(struct ip_vs_service *svc,
711                     struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
712 {
713         int conn_flags;
714
715         /* set the weight and the flags */
716         atomic_set(&dest->weight, udest->weight);
717         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
718
719         /* check if local node and update the flags */
720         if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
721                 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
722                         | IP_VS_CONN_F_LOCALNODE;
723         }
724
725         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
726         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
727                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
728         } else {
729                 /*
730                  *    Put the real service in ip_vs_rtable if not present.
731                  *    For now only for NAT!
732                  */
733                 write_lock_bh(&__ip_vs_rs_lock);
734                 ip_vs_rs_hash(dest);
735                 write_unlock_bh(&__ip_vs_rs_lock);
736         }
737         atomic_set(&dest->conn_flags, conn_flags);
738
739         /* bind the service */
740         if (!dest->svc) {
741                 __ip_vs_bind_svc(dest, svc);
742         } else {
743                 if (dest->svc != svc) {
744                         __ip_vs_unbind_svc(dest);
745                         ip_vs_zero_stats(&dest->stats);
746                         __ip_vs_bind_svc(dest, svc);
747                 }
748         }
749
750         /* set the dest status flags */
751         dest->flags |= IP_VS_DEST_F_AVAILABLE;
752
753         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
754                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
755         dest->u_threshold = udest->u_threshold;
756         dest->l_threshold = udest->l_threshold;
757 }
758
759
760 /*
761  *      Create a destination for the given service
762  */
763 static int
764 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
765                struct ip_vs_dest **dest_p)
766 {
767         struct ip_vs_dest *dest;
768         unsigned atype;
769
770         EnterFunction(2);
771
772         atype = inet_addr_type(&init_net, udest->addr);
773         if (atype != RTN_LOCAL && atype != RTN_UNICAST)
774                 return -EINVAL;
775
776         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
777         if (dest == NULL) {
778                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
779                 return -ENOMEM;
780         }
781
782         dest->protocol = svc->protocol;
783         dest->vaddr.ip = svc->addr.ip;
784         dest->vport = svc->port;
785         dest->vfwmark = svc->fwmark;
786         dest->addr.ip = udest->addr;
787         dest->port = udest->port;
788
789         atomic_set(&dest->activeconns, 0);
790         atomic_set(&dest->inactconns, 0);
791         atomic_set(&dest->persistconns, 0);
792         atomic_set(&dest->refcnt, 0);
793
794         INIT_LIST_HEAD(&dest->d_list);
795         spin_lock_init(&dest->dst_lock);
796         spin_lock_init(&dest->stats.lock);
797         __ip_vs_update_dest(svc, dest, udest);
798         ip_vs_new_estimator(&dest->stats);
799
800         *dest_p = dest;
801
802         LeaveFunction(2);
803         return 0;
804 }
805
806
807 /*
808  *      Add a destination into an existing service
809  */
810 static int
811 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
812 {
813         struct ip_vs_dest *dest;
814         __be32 daddr = udest->addr;
815         __be16 dport = udest->port;
816         int ret;
817
818         EnterFunction(2);
819
820         if (udest->weight < 0) {
821                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
822                 return -ERANGE;
823         }
824
825         if (udest->l_threshold > udest->u_threshold) {
826                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
827                           "upper threshold\n");
828                 return -ERANGE;
829         }
830
831         /*
832          * Check if the dest already exists in the list
833          */
834         dest = ip_vs_lookup_dest(svc, daddr, dport);
835         if (dest != NULL) {
836                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
837                 return -EEXIST;
838         }
839
840         /*
841          * Check if the dest already exists in the trash and
842          * is from the same service
843          */
844         dest = ip_vs_trash_get_dest(svc, daddr, dport);
845         if (dest != NULL) {
846                 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
847                           "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
848                           NIPQUAD(daddr), ntohs(dport),
849                           atomic_read(&dest->refcnt),
850                           dest->vfwmark,
851                           NIPQUAD(dest->vaddr.ip),
852                           ntohs(dest->vport));
853                 __ip_vs_update_dest(svc, dest, udest);
854
855                 /*
856                  * Get the destination from the trash
857                  */
858                 list_del(&dest->n_list);
859
860                 ip_vs_new_estimator(&dest->stats);
861
862                 write_lock_bh(&__ip_vs_svc_lock);
863
864                 /*
865                  * Wait until all other svc users go away.
866                  */
867                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
868
869                 list_add(&dest->n_list, &svc->destinations);
870                 svc->num_dests++;
871
872                 /* call the update_service function of its scheduler */
873                 if (svc->scheduler->update_service)
874                         svc->scheduler->update_service(svc);
875
876                 write_unlock_bh(&__ip_vs_svc_lock);
877                 return 0;
878         }
879
880         /*
881          * Allocate and initialize the dest structure
882          */
883         ret = ip_vs_new_dest(svc, udest, &dest);
884         if (ret) {
885                 return ret;
886         }
887
888         /*
889          * Add the dest entry into the list
890          */
891         atomic_inc(&dest->refcnt);
892
893         write_lock_bh(&__ip_vs_svc_lock);
894
895         /*
896          * Wait until all other svc users go away.
897          */
898         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
899
900         list_add(&dest->n_list, &svc->destinations);
901         svc->num_dests++;
902
903         /* call the update_service function of its scheduler */
904         if (svc->scheduler->update_service)
905                 svc->scheduler->update_service(svc);
906
907         write_unlock_bh(&__ip_vs_svc_lock);
908
909         LeaveFunction(2);
910
911         return 0;
912 }
913
914
915 /*
916  *      Edit a destination in the given service
917  */
918 static int
919 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
920 {
921         struct ip_vs_dest *dest;
922         __be32 daddr = udest->addr;
923         __be16 dport = udest->port;
924
925         EnterFunction(2);
926
927         if (udest->weight < 0) {
928                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
929                 return -ERANGE;
930         }
931
932         if (udest->l_threshold > udest->u_threshold) {
933                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
934                           "upper threshold\n");
935                 return -ERANGE;
936         }
937
938         /*
939          *  Lookup the destination list
940          */
941         dest = ip_vs_lookup_dest(svc, daddr, dport);
942         if (dest == NULL) {
943                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
944                 return -ENOENT;
945         }
946
947         __ip_vs_update_dest(svc, dest, udest);
948
949         write_lock_bh(&__ip_vs_svc_lock);
950
951         /* Wait until all other svc users go away */
952         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
953
954         /* call the update_service, because server weight may be changed */
955         if (svc->scheduler->update_service)
956                 svc->scheduler->update_service(svc);
957
958         write_unlock_bh(&__ip_vs_svc_lock);
959
960         LeaveFunction(2);
961
962         return 0;
963 }
964
965
966 /*
967  *      Delete a destination (must be already unlinked from the service)
968  */
969 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
970 {
971         ip_vs_kill_estimator(&dest->stats);
972
973         /*
974          *  Remove it from the d-linked list with the real services.
975          */
976         write_lock_bh(&__ip_vs_rs_lock);
977         ip_vs_rs_unhash(dest);
978         write_unlock_bh(&__ip_vs_rs_lock);
979
980         /*
981          *  Decrease the refcnt of the dest, and free the dest
982          *  if nobody refers to it (refcnt=0). Otherwise, throw
983          *  the destination into the trash.
984          */
985         if (atomic_dec_and_test(&dest->refcnt)) {
986                 ip_vs_dst_reset(dest);
987                 /* simply decrease svc->refcnt here, let the caller check
988                    and release the service if nobody refers to it.
989                    Only user context can release destination and service,
990                    and only one user context can update virtual service at a
991                    time, so the operation here is OK */
992                 atomic_dec(&dest->svc->refcnt);
993                 kfree(dest);
994         } else {
995                 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
996                           "dest->refcnt=%d\n",
997                           NIPQUAD(dest->addr.ip), ntohs(dest->port),
998                           atomic_read(&dest->refcnt));
999                 list_add(&dest->n_list, &ip_vs_dest_trash);
1000                 atomic_inc(&dest->refcnt);
1001         }
1002 }
1003
1004
1005 /*
1006  *      Unlink a destination from the given service
1007  */
1008 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1009                                 struct ip_vs_dest *dest,
1010                                 int svcupd)
1011 {
1012         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1013
1014         /*
1015          *  Remove it from the d-linked destination list.
1016          */
1017         list_del(&dest->n_list);
1018         svc->num_dests--;
1019
1020         /*
1021          *  Call the update_service function of its scheduler
1022          */
1023         if (svcupd && svc->scheduler->update_service)
1024                         svc->scheduler->update_service(svc);
1025 }
1026
1027
1028 /*
1029  *      Delete a destination server in the given service
1030  */
1031 static int
1032 ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1033 {
1034         struct ip_vs_dest *dest;
1035         __be32 daddr = udest->addr;
1036         __be16 dport = udest->port;
1037
1038         EnterFunction(2);
1039
1040         dest = ip_vs_lookup_dest(svc, daddr, dport);
1041         if (dest == NULL) {
1042                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1043                 return -ENOENT;
1044         }
1045
1046         write_lock_bh(&__ip_vs_svc_lock);
1047
1048         /*
1049          *      Wait until all other svc users go away.
1050          */
1051         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1052
1053         /*
1054          *      Unlink dest from the service
1055          */
1056         __ip_vs_unlink_dest(svc, dest, 1);
1057
1058         write_unlock_bh(&__ip_vs_svc_lock);
1059
1060         /*
1061          *      Delete the destination
1062          */
1063         __ip_vs_del_dest(dest);
1064
1065         LeaveFunction(2);
1066
1067         return 0;
1068 }
1069
1070
1071 /*
1072  *      Add a service into the service hash table
1073  */
1074 static int
1075 ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1076 {
1077         int ret = 0;
1078         struct ip_vs_scheduler *sched = NULL;
1079         struct ip_vs_service *svc = NULL;
1080
1081         /* increase the module use count */
1082         ip_vs_use_count_inc();
1083
1084         /* Lookup the scheduler by 'u->sched_name' */
1085         sched = ip_vs_scheduler_get(u->sched_name);
1086         if (sched == NULL) {
1087                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1088                            u->sched_name);
1089                 ret = -ENOENT;
1090                 goto out_mod_dec;
1091         }
1092
1093         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1094         if (svc == NULL) {
1095                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1096                 ret = -ENOMEM;
1097                 goto out_err;
1098         }
1099
1100         /* I'm the first user of the service */
1101         atomic_set(&svc->usecnt, 1);
1102         atomic_set(&svc->refcnt, 0);
1103
1104         svc->protocol = u->protocol;
1105         svc->addr.ip = u->addr;
1106         svc->port = u->port;
1107         svc->fwmark = u->fwmark;
1108         svc->flags = u->flags;
1109         svc->timeout = u->timeout * HZ;
1110         svc->netmask = u->netmask;
1111
1112         INIT_LIST_HEAD(&svc->destinations);
1113         rwlock_init(&svc->sched_lock);
1114         spin_lock_init(&svc->stats.lock);
1115
1116         /* Bind the scheduler */
1117         ret = ip_vs_bind_scheduler(svc, sched);
1118         if (ret)
1119                 goto out_err;
1120         sched = NULL;
1121
1122         /* Update the virtual service counters */
1123         if (svc->port == FTPPORT)
1124                 atomic_inc(&ip_vs_ftpsvc_counter);
1125         else if (svc->port == 0)
1126                 atomic_inc(&ip_vs_nullsvc_counter);
1127
1128         ip_vs_new_estimator(&svc->stats);
1129         ip_vs_num_services++;
1130
1131         /* Hash the service into the service table */
1132         write_lock_bh(&__ip_vs_svc_lock);
1133         ip_vs_svc_hash(svc);
1134         write_unlock_bh(&__ip_vs_svc_lock);
1135
1136         *svc_p = svc;
1137         return 0;
1138
1139   out_err:
1140         if (svc != NULL) {
1141                 if (svc->scheduler)
1142                         ip_vs_unbind_scheduler(svc);
1143                 if (svc->inc) {
1144                         local_bh_disable();
1145                         ip_vs_app_inc_put(svc->inc);
1146                         local_bh_enable();
1147                 }
1148                 kfree(svc);
1149         }
1150         ip_vs_scheduler_put(sched);
1151
1152   out_mod_dec:
1153         /* decrease the module use count */
1154         ip_vs_use_count_dec();
1155
1156         return ret;
1157 }
1158
1159
1160 /*
1161  *      Edit a service and bind it with a new scheduler
1162  */
1163 static int
1164 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1165 {
1166         struct ip_vs_scheduler *sched, *old_sched;
1167         int ret = 0;
1168
1169         /*
1170          * Lookup the scheduler, by 'u->sched_name'
1171          */
1172         sched = ip_vs_scheduler_get(u->sched_name);
1173         if (sched == NULL) {
1174                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175                            u->sched_name);
1176                 return -ENOENT;
1177         }
1178         old_sched = sched;
1179
1180         write_lock_bh(&__ip_vs_svc_lock);
1181
1182         /*
1183          * Wait until all other svc users go away.
1184          */
1185         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1186
1187         /*
1188          * Set the flags and timeout value
1189          */
1190         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1191         svc->timeout = u->timeout * HZ;
1192         svc->netmask = u->netmask;
1193
1194         old_sched = svc->scheduler;
1195         if (sched != old_sched) {
1196                 /*
1197                  * Unbind the old scheduler
1198                  */
1199                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1200                         old_sched = sched;
1201                         goto out;
1202                 }
1203
1204                 /*
1205                  * Bind the new scheduler
1206                  */
1207                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1208                         /*
1209                          * If ip_vs_bind_scheduler fails, restore the old
1210                          * scheduler.
1211                          * The main reason of failure is out of memory.
1212                          *
1213                          * The question is if the old scheduler can be
1214                          * restored all the time. TODO: if it cannot be
1215                          * restored some time, we must delete the service,
1216                          * otherwise the system may crash.
1217                          */
1218                         ip_vs_bind_scheduler(svc, old_sched);
1219                         old_sched = sched;
1220                         goto out;
1221                 }
1222         }
1223
1224   out:
1225         write_unlock_bh(&__ip_vs_svc_lock);
1226
1227         if (old_sched)
1228                 ip_vs_scheduler_put(old_sched);
1229
1230         return ret;
1231 }
1232
1233
1234 /*
1235  *      Delete a service from the service list
1236  *      - The service must be unlinked, unlocked and not referenced!
1237  *      - We are called under _bh lock
1238  */
1239 static void __ip_vs_del_service(struct ip_vs_service *svc)
1240 {
1241         struct ip_vs_dest *dest, *nxt;
1242         struct ip_vs_scheduler *old_sched;
1243
1244         ip_vs_num_services--;
1245         ip_vs_kill_estimator(&svc->stats);
1246
1247         /* Unbind scheduler */
1248         old_sched = svc->scheduler;
1249         ip_vs_unbind_scheduler(svc);
1250         if (old_sched)
1251                 ip_vs_scheduler_put(old_sched);
1252
1253         /* Unbind app inc */
1254         if (svc->inc) {
1255                 ip_vs_app_inc_put(svc->inc);
1256                 svc->inc = NULL;
1257         }
1258
1259         /*
1260          *    Unlink the whole destination list
1261          */
1262         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1263                 __ip_vs_unlink_dest(svc, dest, 0);
1264                 __ip_vs_del_dest(dest);
1265         }
1266
1267         /*
1268          *    Update the virtual service counters
1269          */
1270         if (svc->port == FTPPORT)
1271                 atomic_dec(&ip_vs_ftpsvc_counter);
1272         else if (svc->port == 0)
1273                 atomic_dec(&ip_vs_nullsvc_counter);
1274
1275         /*
1276          *    Free the service if nobody refers to it
1277          */
1278         if (atomic_read(&svc->refcnt) == 0)
1279                 kfree(svc);
1280
1281         /* decrease the module use count */
1282         ip_vs_use_count_dec();
1283 }
1284
1285 /*
1286  *      Delete a service from the service list
1287  */
1288 static int ip_vs_del_service(struct ip_vs_service *svc)
1289 {
1290         if (svc == NULL)
1291                 return -EEXIST;
1292
1293         /*
1294          * Unhash it from the service table
1295          */
1296         write_lock_bh(&__ip_vs_svc_lock);
1297
1298         ip_vs_svc_unhash(svc);
1299
1300         /*
1301          * Wait until all the svc users go away.
1302          */
1303         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1304
1305         __ip_vs_del_service(svc);
1306
1307         write_unlock_bh(&__ip_vs_svc_lock);
1308
1309         return 0;
1310 }
1311
1312
1313 /*
1314  *      Flush all the virtual services
1315  */
1316 static int ip_vs_flush(void)
1317 {
1318         int idx;
1319         struct ip_vs_service *svc, *nxt;
1320
1321         /*
1322          * Flush the service table hashed by <protocol,addr,port>
1323          */
1324         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1325                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1326                         write_lock_bh(&__ip_vs_svc_lock);
1327                         ip_vs_svc_unhash(svc);
1328                         /*
1329                          * Wait until all the svc users go away.
1330                          */
1331                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1332                         __ip_vs_del_service(svc);
1333                         write_unlock_bh(&__ip_vs_svc_lock);
1334                 }
1335         }
1336
1337         /*
1338          * Flush the service table hashed by fwmark
1339          */
1340         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1341                 list_for_each_entry_safe(svc, nxt,
1342                                          &ip_vs_svc_fwm_table[idx], f_list) {
1343                         write_lock_bh(&__ip_vs_svc_lock);
1344                         ip_vs_svc_unhash(svc);
1345                         /*
1346                          * Wait until all the svc users go away.
1347                          */
1348                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1349                         __ip_vs_del_service(svc);
1350                         write_unlock_bh(&__ip_vs_svc_lock);
1351                 }
1352         }
1353
1354         return 0;
1355 }
1356
1357
1358 /*
1359  *      Zero counters in a service or all services
1360  */
1361 static int ip_vs_zero_service(struct ip_vs_service *svc)
1362 {
1363         struct ip_vs_dest *dest;
1364
1365         write_lock_bh(&__ip_vs_svc_lock);
1366         list_for_each_entry(dest, &svc->destinations, n_list) {
1367                 ip_vs_zero_stats(&dest->stats);
1368         }
1369         ip_vs_zero_stats(&svc->stats);
1370         write_unlock_bh(&__ip_vs_svc_lock);
1371         return 0;
1372 }
1373
1374 static int ip_vs_zero_all(void)
1375 {
1376         int idx;
1377         struct ip_vs_service *svc;
1378
1379         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1380                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1381                         ip_vs_zero_service(svc);
1382                 }
1383         }
1384
1385         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1386                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1387                         ip_vs_zero_service(svc);
1388                 }
1389         }
1390
1391         ip_vs_zero_stats(&ip_vs_stats);
1392         return 0;
1393 }
1394
1395
1396 static int
1397 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1398                      void __user *buffer, size_t *lenp, loff_t *ppos)
1399 {
1400         int *valp = table->data;
1401         int val = *valp;
1402         int rc;
1403
1404         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1405         if (write && (*valp != val)) {
1406                 if ((*valp < 0) || (*valp > 3)) {
1407                         /* Restore the correct value */
1408                         *valp = val;
1409                 } else {
1410                         update_defense_level();
1411                 }
1412         }
1413         return rc;
1414 }
1415
1416
1417 static int
1418 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1419                        void __user *buffer, size_t *lenp, loff_t *ppos)
1420 {
1421         int *valp = table->data;
1422         int val[2];
1423         int rc;
1424
1425         /* backup the value first */
1426         memcpy(val, valp, sizeof(val));
1427
1428         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1429         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1430                 /* Restore the correct value */
1431                 memcpy(valp, val, sizeof(val));
1432         }
1433         return rc;
1434 }
1435
1436
1437 /*
1438  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1439  */
1440
1441 static struct ctl_table vs_vars[] = {
1442         {
1443                 .procname       = "amemthresh",
1444                 .data           = &sysctl_ip_vs_amemthresh,
1445                 .maxlen         = sizeof(int),
1446                 .mode           = 0644,
1447                 .proc_handler   = &proc_dointvec,
1448         },
1449 #ifdef CONFIG_IP_VS_DEBUG
1450         {
1451                 .procname       = "debug_level",
1452                 .data           = &sysctl_ip_vs_debug_level,
1453                 .maxlen         = sizeof(int),
1454                 .mode           = 0644,
1455                 .proc_handler   = &proc_dointvec,
1456         },
1457 #endif
1458         {
1459                 .procname       = "am_droprate",
1460                 .data           = &sysctl_ip_vs_am_droprate,
1461                 .maxlen         = sizeof(int),
1462                 .mode           = 0644,
1463                 .proc_handler   = &proc_dointvec,
1464         },
1465         {
1466                 .procname       = "drop_entry",
1467                 .data           = &sysctl_ip_vs_drop_entry,
1468                 .maxlen         = sizeof(int),
1469                 .mode           = 0644,
1470                 .proc_handler   = &proc_do_defense_mode,
1471         },
1472         {
1473                 .procname       = "drop_packet",
1474                 .data           = &sysctl_ip_vs_drop_packet,
1475                 .maxlen         = sizeof(int),
1476                 .mode           = 0644,
1477                 .proc_handler   = &proc_do_defense_mode,
1478         },
1479         {
1480                 .procname       = "secure_tcp",
1481                 .data           = &sysctl_ip_vs_secure_tcp,
1482                 .maxlen         = sizeof(int),
1483                 .mode           = 0644,
1484                 .proc_handler   = &proc_do_defense_mode,
1485         },
1486 #if 0
1487         {
1488                 .procname       = "timeout_established",
1489                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1490                 .maxlen         = sizeof(int),
1491                 .mode           = 0644,
1492                 .proc_handler   = &proc_dointvec_jiffies,
1493         },
1494         {
1495                 .procname       = "timeout_synsent",
1496                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1497                 .maxlen         = sizeof(int),
1498                 .mode           = 0644,
1499                 .proc_handler   = &proc_dointvec_jiffies,
1500         },
1501         {
1502                 .procname       = "timeout_synrecv",
1503                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1504                 .maxlen         = sizeof(int),
1505                 .mode           = 0644,
1506                 .proc_handler   = &proc_dointvec_jiffies,
1507         },
1508         {
1509                 .procname       = "timeout_finwait",
1510                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1511                 .maxlen         = sizeof(int),
1512                 .mode           = 0644,
1513                 .proc_handler   = &proc_dointvec_jiffies,
1514         },
1515         {
1516                 .procname       = "timeout_timewait",
1517                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1518                 .maxlen         = sizeof(int),
1519                 .mode           = 0644,
1520                 .proc_handler   = &proc_dointvec_jiffies,
1521         },
1522         {
1523                 .procname       = "timeout_close",
1524                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1525                 .maxlen         = sizeof(int),
1526                 .mode           = 0644,
1527                 .proc_handler   = &proc_dointvec_jiffies,
1528         },
1529         {
1530                 .procname       = "timeout_closewait",
1531                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1532                 .maxlen         = sizeof(int),
1533                 .mode           = 0644,
1534                 .proc_handler   = &proc_dointvec_jiffies,
1535         },
1536         {
1537                 .procname       = "timeout_lastack",
1538                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1539                 .maxlen         = sizeof(int),
1540                 .mode           = 0644,
1541                 .proc_handler   = &proc_dointvec_jiffies,
1542         },
1543         {
1544                 .procname       = "timeout_listen",
1545                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1546                 .maxlen         = sizeof(int),
1547                 .mode           = 0644,
1548                 .proc_handler   = &proc_dointvec_jiffies,
1549         },
1550         {
1551                 .procname       = "timeout_synack",
1552                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1553                 .maxlen         = sizeof(int),
1554                 .mode           = 0644,
1555                 .proc_handler   = &proc_dointvec_jiffies,
1556         },
1557         {
1558                 .procname       = "timeout_udp",
1559                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1560                 .maxlen         = sizeof(int),
1561                 .mode           = 0644,
1562                 .proc_handler   = &proc_dointvec_jiffies,
1563         },
1564         {
1565                 .procname       = "timeout_icmp",
1566                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1567                 .maxlen         = sizeof(int),
1568                 .mode           = 0644,
1569                 .proc_handler   = &proc_dointvec_jiffies,
1570         },
1571 #endif
1572         {
1573                 .procname       = "cache_bypass",
1574                 .data           = &sysctl_ip_vs_cache_bypass,
1575                 .maxlen         = sizeof(int),
1576                 .mode           = 0644,
1577                 .proc_handler   = &proc_dointvec,
1578         },
1579         {
1580                 .procname       = "expire_nodest_conn",
1581                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1582                 .maxlen         = sizeof(int),
1583                 .mode           = 0644,
1584                 .proc_handler   = &proc_dointvec,
1585         },
1586         {
1587                 .procname       = "expire_quiescent_template",
1588                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1589                 .maxlen         = sizeof(int),
1590                 .mode           = 0644,
1591                 .proc_handler   = &proc_dointvec,
1592         },
1593         {
1594                 .procname       = "sync_threshold",
1595                 .data           = &sysctl_ip_vs_sync_threshold,
1596                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1597                 .mode           = 0644,
1598                 .proc_handler   = &proc_do_sync_threshold,
1599         },
1600         {
1601                 .procname       = "nat_icmp_send",
1602                 .data           = &sysctl_ip_vs_nat_icmp_send,
1603                 .maxlen         = sizeof(int),
1604                 .mode           = 0644,
1605                 .proc_handler   = &proc_dointvec,
1606         },
1607         { .ctl_name = 0 }
1608 };
1609
1610 const struct ctl_path net_vs_ctl_path[] = {
1611         { .procname = "net", .ctl_name = CTL_NET, },
1612         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1613         { .procname = "vs", },
1614         { }
1615 };
1616 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1617
1618 static struct ctl_table_header * sysctl_header;
1619
1620 #ifdef CONFIG_PROC_FS
1621
1622 struct ip_vs_iter {
1623         struct list_head *table;
1624         int bucket;
1625 };
1626
1627 /*
1628  *      Write the contents of the VS rule table to a PROCfs file.
1629  *      (It is kept just for backward compatibility)
1630  */
1631 static inline const char *ip_vs_fwd_name(unsigned flags)
1632 {
1633         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1634         case IP_VS_CONN_F_LOCALNODE:
1635                 return "Local";
1636         case IP_VS_CONN_F_TUNNEL:
1637                 return "Tunnel";
1638         case IP_VS_CONN_F_DROUTE:
1639                 return "Route";
1640         default:
1641                 return "Masq";
1642         }
1643 }
1644
1645
1646 /* Get the Nth entry in the two lists */
1647 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1648 {
1649         struct ip_vs_iter *iter = seq->private;
1650         int idx;
1651         struct ip_vs_service *svc;
1652
1653         /* look in hash by protocol */
1654         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1655                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1656                         if (pos-- == 0){
1657                                 iter->table = ip_vs_svc_table;
1658                                 iter->bucket = idx;
1659                                 return svc;
1660                         }
1661                 }
1662         }
1663
1664         /* keep looking in fwmark */
1665         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1666                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1667                         if (pos-- == 0) {
1668                                 iter->table = ip_vs_svc_fwm_table;
1669                                 iter->bucket = idx;
1670                                 return svc;
1671                         }
1672                 }
1673         }
1674
1675         return NULL;
1676 }
1677
1678 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1679 {
1680
1681         read_lock_bh(&__ip_vs_svc_lock);
1682         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1683 }
1684
1685
1686 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1687 {
1688         struct list_head *e;
1689         struct ip_vs_iter *iter;
1690         struct ip_vs_service *svc;
1691
1692         ++*pos;
1693         if (v == SEQ_START_TOKEN)
1694                 return ip_vs_info_array(seq,0);
1695
1696         svc = v;
1697         iter = seq->private;
1698
1699         if (iter->table == ip_vs_svc_table) {
1700                 /* next service in table hashed by protocol */
1701                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1702                         return list_entry(e, struct ip_vs_service, s_list);
1703
1704
1705                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1706                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1707                                             s_list) {
1708                                 return svc;
1709                         }
1710                 }
1711
1712                 iter->table = ip_vs_svc_fwm_table;
1713                 iter->bucket = -1;
1714                 goto scan_fwmark;
1715         }
1716
1717         /* next service in hashed by fwmark */
1718         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1719                 return list_entry(e, struct ip_vs_service, f_list);
1720
1721  scan_fwmark:
1722         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1723                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1724                                     f_list)
1725                         return svc;
1726         }
1727
1728         return NULL;
1729 }
1730
1731 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1732 {
1733         read_unlock_bh(&__ip_vs_svc_lock);
1734 }
1735
1736
1737 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1738 {
1739         if (v == SEQ_START_TOKEN) {
1740                 seq_printf(seq,
1741                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1742                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1743                 seq_puts(seq,
1744                          "Prot LocalAddress:Port Scheduler Flags\n");
1745                 seq_puts(seq,
1746                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1747         } else {
1748                 const struct ip_vs_service *svc = v;
1749                 const struct ip_vs_iter *iter = seq->private;
1750                 const struct ip_vs_dest *dest;
1751
1752                 if (iter->table == ip_vs_svc_table)
1753                         seq_printf(seq, "%s  %08X:%04X %s ",
1754                                    ip_vs_proto_name(svc->protocol),
1755                                    ntohl(svc->addr.ip),
1756                                    ntohs(svc->port),
1757                                    svc->scheduler->name);
1758                 else
1759                         seq_printf(seq, "FWM  %08X %s ",
1760                                    svc->fwmark, svc->scheduler->name);
1761
1762                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1763                         seq_printf(seq, "persistent %d %08X\n",
1764                                 svc->timeout,
1765                                 ntohl(svc->netmask));
1766                 else
1767                         seq_putc(seq, '\n');
1768
1769                 list_for_each_entry(dest, &svc->destinations, n_list) {
1770                         seq_printf(seq,
1771                                    "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1772                                    ntohl(dest->addr.ip), ntohs(dest->port),
1773                                    ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1774                                    atomic_read(&dest->weight),
1775                                    atomic_read(&dest->activeconns),
1776                                    atomic_read(&dest->inactconns));
1777                 }
1778         }
1779         return 0;
1780 }
1781
1782 static const struct seq_operations ip_vs_info_seq_ops = {
1783         .start = ip_vs_info_seq_start,
1784         .next  = ip_vs_info_seq_next,
1785         .stop  = ip_vs_info_seq_stop,
1786         .show  = ip_vs_info_seq_show,
1787 };
1788
1789 static int ip_vs_info_open(struct inode *inode, struct file *file)
1790 {
1791         return seq_open_private(file, &ip_vs_info_seq_ops,
1792                         sizeof(struct ip_vs_iter));
1793 }
1794
1795 static const struct file_operations ip_vs_info_fops = {
1796         .owner   = THIS_MODULE,
1797         .open    = ip_vs_info_open,
1798         .read    = seq_read,
1799         .llseek  = seq_lseek,
1800         .release = seq_release_private,
1801 };
1802
1803 #endif
1804
1805 struct ip_vs_stats ip_vs_stats = {
1806         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1807 };
1808
1809 #ifdef CONFIG_PROC_FS
1810 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1811 {
1812
1813 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1814         seq_puts(seq,
1815                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1816         seq_printf(seq,
1817                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1818
1819         spin_lock_bh(&ip_vs_stats.lock);
1820         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1821                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1822                    (unsigned long long) ip_vs_stats.inbytes,
1823                    (unsigned long long) ip_vs_stats.outbytes);
1824
1825 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1826         seq_puts(seq,
1827                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1828         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1829                         ip_vs_stats.cps,
1830                         ip_vs_stats.inpps,
1831                         ip_vs_stats.outpps,
1832                         ip_vs_stats.inbps,
1833                         ip_vs_stats.outbps);
1834         spin_unlock_bh(&ip_vs_stats.lock);
1835
1836         return 0;
1837 }
1838
1839 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1840 {
1841         return single_open(file, ip_vs_stats_show, NULL);
1842 }
1843
1844 static const struct file_operations ip_vs_stats_fops = {
1845         .owner = THIS_MODULE,
1846         .open = ip_vs_stats_seq_open,
1847         .read = seq_read,
1848         .llseek = seq_lseek,
1849         .release = single_release,
1850 };
1851
1852 #endif
1853
1854 /*
1855  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1856  */
1857 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1858 {
1859         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1860                   u->tcp_timeout,
1861                   u->tcp_fin_timeout,
1862                   u->udp_timeout);
1863
1864 #ifdef CONFIG_IP_VS_PROTO_TCP
1865         if (u->tcp_timeout) {
1866                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1867                         = u->tcp_timeout * HZ;
1868         }
1869
1870         if (u->tcp_fin_timeout) {
1871                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1872                         = u->tcp_fin_timeout * HZ;
1873         }
1874 #endif
1875
1876 #ifdef CONFIG_IP_VS_PROTO_UDP
1877         if (u->udp_timeout) {
1878                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1879                         = u->udp_timeout * HZ;
1880         }
1881 #endif
1882         return 0;
1883 }
1884
1885
1886 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1887 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1888 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1889                                  sizeof(struct ip_vs_dest_user))
1890 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1891 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1892 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
1893
1894 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1895         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1896         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1897         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1898         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1899         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1900         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1901         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1902         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1903         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1904         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1905         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1906 };
1907
1908 static int
1909 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1910 {
1911         int ret;
1912         unsigned char arg[MAX_ARG_LEN];
1913         struct ip_vs_service_user *usvc;
1914         struct ip_vs_service *svc;
1915         struct ip_vs_dest_user *udest;
1916
1917         if (!capable(CAP_NET_ADMIN))
1918                 return -EPERM;
1919
1920         if (len != set_arglen[SET_CMDID(cmd)]) {
1921                 IP_VS_ERR("set_ctl: len %u != %u\n",
1922                           len, set_arglen[SET_CMDID(cmd)]);
1923                 return -EINVAL;
1924         }
1925
1926         if (copy_from_user(arg, user, len) != 0)
1927                 return -EFAULT;
1928
1929         /* increase the module use count */
1930         ip_vs_use_count_inc();
1931
1932         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1933                 ret = -ERESTARTSYS;
1934                 goto out_dec;
1935         }
1936
1937         if (cmd == IP_VS_SO_SET_FLUSH) {
1938                 /* Flush the virtual service */
1939                 ret = ip_vs_flush();
1940                 goto out_unlock;
1941         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1942                 /* Set timeout values for (tcp tcpfin udp) */
1943                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1944                 goto out_unlock;
1945         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1946                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1947                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1948                 goto out_unlock;
1949         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1950                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1951                 ret = stop_sync_thread(dm->state);
1952                 goto out_unlock;
1953         }
1954
1955         usvc = (struct ip_vs_service_user *)arg;
1956         udest = (struct ip_vs_dest_user *)(usvc + 1);
1957
1958         if (cmd == IP_VS_SO_SET_ZERO) {
1959                 /* if no service address is set, zero counters in all */
1960                 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1961                         ret = ip_vs_zero_all();
1962                         goto out_unlock;
1963                 }
1964         }
1965
1966         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1967         if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1968                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1969                           usvc->protocol, NIPQUAD(usvc->addr),
1970                           ntohs(usvc->port), usvc->sched_name);
1971                 ret = -EFAULT;
1972                 goto out_unlock;
1973         }
1974
1975         /* Lookup the exact service by <protocol, addr, port> or fwmark */
1976         if (usvc->fwmark == 0)
1977                 svc = __ip_vs_service_get(usvc->protocol,
1978                                           usvc->addr, usvc->port);
1979         else
1980                 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1981
1982         if (cmd != IP_VS_SO_SET_ADD
1983             && (svc == NULL || svc->protocol != usvc->protocol)) {
1984                 ret = -ESRCH;
1985                 goto out_unlock;
1986         }
1987
1988         switch (cmd) {
1989         case IP_VS_SO_SET_ADD:
1990                 if (svc != NULL)
1991                         ret = -EEXIST;
1992                 else
1993                         ret = ip_vs_add_service(usvc, &svc);
1994                 break;
1995         case IP_VS_SO_SET_EDIT:
1996                 ret = ip_vs_edit_service(svc, usvc);
1997                 break;
1998         case IP_VS_SO_SET_DEL:
1999                 ret = ip_vs_del_service(svc);
2000                 if (!ret)
2001                         goto out_unlock;
2002                 break;
2003         case IP_VS_SO_SET_ZERO:
2004                 ret = ip_vs_zero_service(svc);
2005                 break;
2006         case IP_VS_SO_SET_ADDDEST:
2007                 ret = ip_vs_add_dest(svc, udest);
2008                 break;
2009         case IP_VS_SO_SET_EDITDEST:
2010                 ret = ip_vs_edit_dest(svc, udest);
2011                 break;
2012         case IP_VS_SO_SET_DELDEST:
2013                 ret = ip_vs_del_dest(svc, udest);
2014                 break;
2015         default:
2016                 ret = -EINVAL;
2017         }
2018
2019         if (svc)
2020                 ip_vs_service_put(svc);
2021
2022   out_unlock:
2023         mutex_unlock(&__ip_vs_mutex);
2024   out_dec:
2025         /* decrease the module use count */
2026         ip_vs_use_count_dec();
2027
2028         return ret;
2029 }
2030
2031
2032 static void
2033 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2034 {
2035         spin_lock_bh(&src->lock);
2036         memcpy(dst, src, (char*)&src->lock - (char*)src);
2037         spin_unlock_bh(&src->lock);
2038 }
2039
2040 static void
2041 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2042 {
2043         dst->protocol = src->protocol;
2044         dst->addr = src->addr.ip;
2045         dst->port = src->port;
2046         dst->fwmark = src->fwmark;
2047         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2048         dst->flags = src->flags;
2049         dst->timeout = src->timeout / HZ;
2050         dst->netmask = src->netmask;
2051         dst->num_dests = src->num_dests;
2052         ip_vs_copy_stats(&dst->stats, &src->stats);
2053 }
2054
2055 static inline int
2056 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2057                             struct ip_vs_get_services __user *uptr)
2058 {
2059         int idx, count=0;
2060         struct ip_vs_service *svc;
2061         struct ip_vs_service_entry entry;
2062         int ret = 0;
2063
2064         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2065                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2066                         if (count >= get->num_services)
2067                                 goto out;
2068                         memset(&entry, 0, sizeof(entry));
2069                         ip_vs_copy_service(&entry, svc);
2070                         if (copy_to_user(&uptr->entrytable[count],
2071                                          &entry, sizeof(entry))) {
2072                                 ret = -EFAULT;
2073                                 goto out;
2074                         }
2075                         count++;
2076                 }
2077         }
2078
2079         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2080                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2081                         if (count >= get->num_services)
2082                                 goto out;
2083                         memset(&entry, 0, sizeof(entry));
2084                         ip_vs_copy_service(&entry, svc);
2085                         if (copy_to_user(&uptr->entrytable[count],
2086                                          &entry, sizeof(entry))) {
2087                                 ret = -EFAULT;
2088                                 goto out;
2089                         }
2090                         count++;
2091                 }
2092         }
2093   out:
2094         return ret;
2095 }
2096
2097 static inline int
2098 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2099                          struct ip_vs_get_dests __user *uptr)
2100 {
2101         struct ip_vs_service *svc;
2102         int ret = 0;
2103
2104         if (get->fwmark)
2105                 svc = __ip_vs_svc_fwm_get(get->fwmark);
2106         else
2107                 svc = __ip_vs_service_get(get->protocol,
2108                                           get->addr, get->port);
2109         if (svc) {
2110                 int count = 0;
2111                 struct ip_vs_dest *dest;
2112                 struct ip_vs_dest_entry entry;
2113
2114                 list_for_each_entry(dest, &svc->destinations, n_list) {
2115                         if (count >= get->num_dests)
2116                                 break;
2117
2118                         entry.addr = dest->addr.ip;
2119                         entry.port = dest->port;
2120                         entry.conn_flags = atomic_read(&dest->conn_flags);
2121                         entry.weight = atomic_read(&dest->weight);
2122                         entry.u_threshold = dest->u_threshold;
2123                         entry.l_threshold = dest->l_threshold;
2124                         entry.activeconns = atomic_read(&dest->activeconns);
2125                         entry.inactconns = atomic_read(&dest->inactconns);
2126                         entry.persistconns = atomic_read(&dest->persistconns);
2127                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2128                         if (copy_to_user(&uptr->entrytable[count],
2129                                          &entry, sizeof(entry))) {
2130                                 ret = -EFAULT;
2131                                 break;
2132                         }
2133                         count++;
2134                 }
2135                 ip_vs_service_put(svc);
2136         } else
2137                 ret = -ESRCH;
2138         return ret;
2139 }
2140
2141 static inline void
2142 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2143 {
2144 #ifdef CONFIG_IP_VS_PROTO_TCP
2145         u->tcp_timeout =
2146                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2147         u->tcp_fin_timeout =
2148                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2149 #endif
2150 #ifdef CONFIG_IP_VS_PROTO_UDP
2151         u->udp_timeout =
2152                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2153 #endif
2154 }
2155
2156
2157 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2158 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2159 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2160 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2161 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2162 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2163 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2164
2165 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2166         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2167         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2168         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2169         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2170         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2171         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2172         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2173 };
2174
2175 static int
2176 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2177 {
2178         unsigned char arg[128];
2179         int ret = 0;
2180
2181         if (!capable(CAP_NET_ADMIN))
2182                 return -EPERM;
2183
2184         if (*len < get_arglen[GET_CMDID(cmd)]) {
2185                 IP_VS_ERR("get_ctl: len %u < %u\n",
2186                           *len, get_arglen[GET_CMDID(cmd)]);
2187                 return -EINVAL;
2188         }
2189
2190         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2191                 return -EFAULT;
2192
2193         if (mutex_lock_interruptible(&__ip_vs_mutex))
2194                 return -ERESTARTSYS;
2195
2196         switch (cmd) {
2197         case IP_VS_SO_GET_VERSION:
2198         {
2199                 char buf[64];
2200
2201                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2202                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2203                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2204                         ret = -EFAULT;
2205                         goto out;
2206                 }
2207                 *len = strlen(buf)+1;
2208         }
2209         break;
2210
2211         case IP_VS_SO_GET_INFO:
2212         {
2213                 struct ip_vs_getinfo info;
2214                 info.version = IP_VS_VERSION_CODE;
2215                 info.size = IP_VS_CONN_TAB_SIZE;
2216                 info.num_services = ip_vs_num_services;
2217                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2218                         ret = -EFAULT;
2219         }
2220         break;
2221
2222         case IP_VS_SO_GET_SERVICES:
2223         {
2224                 struct ip_vs_get_services *get;
2225                 int size;
2226
2227                 get = (struct ip_vs_get_services *)arg;
2228                 size = sizeof(*get) +
2229                         sizeof(struct ip_vs_service_entry) * get->num_services;
2230                 if (*len != size) {
2231                         IP_VS_ERR("length: %u != %u\n", *len, size);
2232                         ret = -EINVAL;
2233                         goto out;
2234                 }
2235                 ret = __ip_vs_get_service_entries(get, user);
2236         }
2237         break;
2238
2239         case IP_VS_SO_GET_SERVICE:
2240         {
2241                 struct ip_vs_service_entry *entry;
2242                 struct ip_vs_service *svc;
2243
2244                 entry = (struct ip_vs_service_entry *)arg;
2245                 if (entry->fwmark)
2246                         svc = __ip_vs_svc_fwm_get(entry->fwmark);
2247                 else
2248                         svc = __ip_vs_service_get(entry->protocol,
2249                                                   entry->addr, entry->port);
2250                 if (svc) {
2251                         ip_vs_copy_service(entry, svc);
2252                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2253                                 ret = -EFAULT;
2254                         ip_vs_service_put(svc);
2255                 } else
2256                         ret = -ESRCH;
2257         }
2258         break;
2259
2260         case IP_VS_SO_GET_DESTS:
2261         {
2262                 struct ip_vs_get_dests *get;
2263                 int size;
2264
2265                 get = (struct ip_vs_get_dests *)arg;
2266                 size = sizeof(*get) +
2267                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2268                 if (*len != size) {
2269                         IP_VS_ERR("length: %u != %u\n", *len, size);
2270                         ret = -EINVAL;
2271                         goto out;
2272                 }
2273                 ret = __ip_vs_get_dest_entries(get, user);
2274         }
2275         break;
2276
2277         case IP_VS_SO_GET_TIMEOUT:
2278         {
2279                 struct ip_vs_timeout_user t;
2280
2281                 __ip_vs_get_timeouts(&t);
2282                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2283                         ret = -EFAULT;
2284         }
2285         break;
2286
2287         case IP_VS_SO_GET_DAEMON:
2288         {
2289                 struct ip_vs_daemon_user d[2];
2290
2291                 memset(&d, 0, sizeof(d));
2292                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2293                         d[0].state = IP_VS_STATE_MASTER;
2294                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2295                         d[0].syncid = ip_vs_master_syncid;
2296                 }
2297                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2298                         d[1].state = IP_VS_STATE_BACKUP;
2299                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2300                         d[1].syncid = ip_vs_backup_syncid;
2301                 }
2302                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2303                         ret = -EFAULT;
2304         }
2305         break;
2306
2307         default:
2308                 ret = -EINVAL;
2309         }
2310
2311   out:
2312         mutex_unlock(&__ip_vs_mutex);
2313         return ret;
2314 }
2315
2316
2317 static struct nf_sockopt_ops ip_vs_sockopts = {
2318         .pf             = PF_INET,
2319         .set_optmin     = IP_VS_BASE_CTL,
2320         .set_optmax     = IP_VS_SO_SET_MAX+1,
2321         .set            = do_ip_vs_set_ctl,
2322         .get_optmin     = IP_VS_BASE_CTL,
2323         .get_optmax     = IP_VS_SO_GET_MAX+1,
2324         .get            = do_ip_vs_get_ctl,
2325         .owner          = THIS_MODULE,
2326 };
2327
2328 /*
2329  * Generic Netlink interface
2330  */
2331
2332 /* IPVS genetlink family */
2333 static struct genl_family ip_vs_genl_family = {
2334         .id             = GENL_ID_GENERATE,
2335         .hdrsize        = 0,
2336         .name           = IPVS_GENL_NAME,
2337         .version        = IPVS_GENL_VERSION,
2338         .maxattr        = IPVS_CMD_MAX,
2339 };
2340
2341 /* Policy used for first-level command attributes */
2342 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2343         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2344         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2345         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2346         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2347         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2348         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2349 };
2350
2351 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2352 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2353         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2354         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2355                                             .len = IP_VS_IFNAME_MAXLEN },
2356         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2357 };
2358
2359 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2360 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2361         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2362         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2363         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2364                                             .len = sizeof(union nf_inet_addr) },
2365         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2366         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2367         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2368                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2369         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2370                                             .len = sizeof(struct ip_vs_flags) },
2371         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2372         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2373         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2374 };
2375
2376 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2377 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2378         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2379                                             .len = sizeof(union nf_inet_addr) },
2380         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2381         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2382         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2383         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2384         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2385         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2386         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2387         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2388         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2389 };
2390
2391 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2392                                  struct ip_vs_stats *stats)
2393 {
2394         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2395         if (!nl_stats)
2396                 return -EMSGSIZE;
2397
2398         spin_lock_bh(&stats->lock);
2399
2400         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2401         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2402         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2403         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2404         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2405         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2406         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2407         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2408         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2409         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2410
2411         spin_unlock_bh(&stats->lock);
2412
2413         nla_nest_end(skb, nl_stats);
2414
2415         return 0;
2416
2417 nla_put_failure:
2418         spin_unlock_bh(&stats->lock);
2419         nla_nest_cancel(skb, nl_stats);
2420         return -EMSGSIZE;
2421 }
2422
2423 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2424                                    struct ip_vs_service *svc)
2425 {
2426         struct nlattr *nl_service;
2427         struct ip_vs_flags flags = { .flags = svc->flags,
2428                                      .mask = ~0 };
2429
2430         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2431         if (!nl_service)
2432                 return -EMSGSIZE;
2433
2434         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2435
2436         if (svc->fwmark) {
2437                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2438         } else {
2439                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2440                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2441                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2442         }
2443
2444         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2445         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2446         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2447         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2448
2449         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2450                 goto nla_put_failure;
2451
2452         nla_nest_end(skb, nl_service);
2453
2454         return 0;
2455
2456 nla_put_failure:
2457         nla_nest_cancel(skb, nl_service);
2458         return -EMSGSIZE;
2459 }
2460
2461 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2462                                    struct ip_vs_service *svc,
2463                                    struct netlink_callback *cb)
2464 {
2465         void *hdr;
2466
2467         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2468                           &ip_vs_genl_family, NLM_F_MULTI,
2469                           IPVS_CMD_NEW_SERVICE);
2470         if (!hdr)
2471                 return -EMSGSIZE;
2472
2473         if (ip_vs_genl_fill_service(skb, svc) < 0)
2474                 goto nla_put_failure;
2475
2476         return genlmsg_end(skb, hdr);
2477
2478 nla_put_failure:
2479         genlmsg_cancel(skb, hdr);
2480         return -EMSGSIZE;
2481 }
2482
2483 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2484                                     struct netlink_callback *cb)
2485 {
2486         int idx = 0, i;
2487         int start = cb->args[0];
2488         struct ip_vs_service *svc;
2489
2490         mutex_lock(&__ip_vs_mutex);
2491         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2492                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2493                         if (++idx <= start)
2494                                 continue;
2495                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2496                                 idx--;
2497                                 goto nla_put_failure;
2498                         }
2499                 }
2500         }
2501
2502         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2503                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2504                         if (++idx <= start)
2505                                 continue;
2506                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2507                                 idx--;
2508                                 goto nla_put_failure;
2509                         }
2510                 }
2511         }
2512
2513 nla_put_failure:
2514         mutex_unlock(&__ip_vs_mutex);
2515         cb->args[0] = idx;
2516
2517         return skb->len;
2518 }
2519
2520 static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
2521                                     struct nlattr *nla, int full_entry)
2522 {
2523         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2524         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2525
2526         /* Parse mandatory identifying service fields first */
2527         if (nla == NULL ||
2528             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2529                 return -EINVAL;
2530
2531         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2532         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2533         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2534         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2535         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2536
2537         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2538                 return -EINVAL;
2539
2540         /* For now, only support IPv4 */
2541         if (nla_get_u16(nla_af) != AF_INET)
2542                 return -EAFNOSUPPORT;
2543
2544         if (nla_fwmark) {
2545                 usvc->protocol = IPPROTO_TCP;
2546                 usvc->fwmark = nla_get_u32(nla_fwmark);
2547         } else {
2548                 usvc->protocol = nla_get_u16(nla_protocol);
2549                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2550                 usvc->port = nla_get_u16(nla_port);
2551                 usvc->fwmark = 0;
2552         }
2553
2554         /* If a full entry was requested, check for the additional fields */
2555         if (full_entry) {
2556                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2557                               *nla_netmask;
2558                 struct ip_vs_flags flags;
2559                 struct ip_vs_service *svc;
2560
2561                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2562                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2563                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2564                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2565
2566                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2567                         return -EINVAL;
2568
2569                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2570
2571                 /* prefill flags from service if it already exists */
2572                 if (usvc->fwmark)
2573                         svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2574                 else
2575                         svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
2576                                                   usvc->port);
2577                 if (svc) {
2578                         usvc->flags = svc->flags;
2579                         ip_vs_service_put(svc);
2580                 } else
2581                         usvc->flags = 0;
2582
2583                 /* set new flags from userland */
2584                 usvc->flags = (usvc->flags & ~flags.mask) |
2585                               (flags.flags & flags.mask);
2586
2587                 strlcpy(usvc->sched_name, nla_data(nla_sched),
2588                         sizeof(usvc->sched_name));
2589                 usvc->timeout = nla_get_u32(nla_timeout);
2590                 usvc->netmask = nla_get_u32(nla_netmask);
2591         }
2592
2593         return 0;
2594 }
2595
2596 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2597 {
2598         struct ip_vs_service_user usvc;
2599         int ret;
2600
2601         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2602         if (ret)
2603                 return ERR_PTR(ret);
2604
2605         if (usvc.fwmark)
2606                 return __ip_vs_svc_fwm_get(usvc.fwmark);
2607         else
2608                 return __ip_vs_service_get(usvc.protocol, usvc.addr,
2609                                            usvc.port);
2610 }
2611
2612 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2613 {
2614         struct nlattr *nl_dest;
2615
2616         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2617         if (!nl_dest)
2618                 return -EMSGSIZE;
2619
2620         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2621         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2622
2623         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2624                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2625         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2626         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2627         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2628         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2629                     atomic_read(&dest->activeconns));
2630         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2631                     atomic_read(&dest->inactconns));
2632         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2633                     atomic_read(&dest->persistconns));
2634
2635         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2636                 goto nla_put_failure;
2637
2638         nla_nest_end(skb, nl_dest);
2639
2640         return 0;
2641
2642 nla_put_failure:
2643         nla_nest_cancel(skb, nl_dest);
2644         return -EMSGSIZE;
2645 }
2646
2647 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2648                                 struct netlink_callback *cb)
2649 {
2650         void *hdr;
2651
2652         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2653                           &ip_vs_genl_family, NLM_F_MULTI,
2654                           IPVS_CMD_NEW_DEST);
2655         if (!hdr)
2656                 return -EMSGSIZE;
2657
2658         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2659                 goto nla_put_failure;
2660
2661         return genlmsg_end(skb, hdr);
2662
2663 nla_put_failure:
2664         genlmsg_cancel(skb, hdr);
2665         return -EMSGSIZE;
2666 }
2667
2668 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2669                                  struct netlink_callback *cb)
2670 {
2671         int idx = 0;
2672         int start = cb->args[0];
2673         struct ip_vs_service *svc;
2674         struct ip_vs_dest *dest;
2675         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2676
2677         mutex_lock(&__ip_vs_mutex);
2678
2679         /* Try to find the service for which to dump destinations */
2680         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2681                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2682                 goto out_err;
2683
2684         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2685         if (IS_ERR(svc) || svc == NULL)
2686                 goto out_err;
2687
2688         /* Dump the destinations */
2689         list_for_each_entry(dest, &svc->destinations, n_list) {
2690                 if (++idx <= start)
2691                         continue;
2692                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2693                         idx--;
2694                         goto nla_put_failure;
2695                 }
2696         }
2697
2698 nla_put_failure:
2699         cb->args[0] = idx;
2700         ip_vs_service_put(svc);
2701
2702 out_err:
2703         mutex_unlock(&__ip_vs_mutex);
2704
2705         return skb->len;
2706 }
2707
2708 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
2709                                  struct nlattr *nla, int full_entry)
2710 {
2711         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2712         struct nlattr *nla_addr, *nla_port;
2713
2714         /* Parse mandatory identifying destination fields first */
2715         if (nla == NULL ||
2716             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2717                 return -EINVAL;
2718
2719         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2720         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2721
2722         if (!(nla_addr && nla_port))
2723                 return -EINVAL;
2724
2725         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2726         udest->port = nla_get_u16(nla_port);
2727
2728         /* If a full entry was requested, check for the additional fields */
2729         if (full_entry) {
2730                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2731                               *nla_l_thresh;
2732
2733                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2734                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2735                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2736                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2737
2738                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2739                         return -EINVAL;
2740
2741                 udest->conn_flags = nla_get_u32(nla_fwd)
2742                                     & IP_VS_CONN_F_FWD_MASK;
2743                 udest->weight = nla_get_u32(nla_weight);
2744                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2745                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2746         }
2747
2748         return 0;
2749 }
2750
2751 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2752                                   const char *mcast_ifn, __be32 syncid)
2753 {
2754         struct nlattr *nl_daemon;
2755
2756         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2757         if (!nl_daemon)
2758                 return -EMSGSIZE;
2759
2760         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2761         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2762         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2763
2764         nla_nest_end(skb, nl_daemon);
2765
2766         return 0;
2767
2768 nla_put_failure:
2769         nla_nest_cancel(skb, nl_daemon);
2770         return -EMSGSIZE;
2771 }
2772
2773 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2774                                   const char *mcast_ifn, __be32 syncid,
2775                                   struct netlink_callback *cb)
2776 {
2777         void *hdr;
2778         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2779                           &ip_vs_genl_family, NLM_F_MULTI,
2780                           IPVS_CMD_NEW_DAEMON);
2781         if (!hdr)
2782                 return -EMSGSIZE;
2783
2784         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2785                 goto nla_put_failure;
2786
2787         return genlmsg_end(skb, hdr);
2788
2789 nla_put_failure:
2790         genlmsg_cancel(skb, hdr);
2791         return -EMSGSIZE;
2792 }
2793
2794 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2795                                    struct netlink_callback *cb)
2796 {
2797         mutex_lock(&__ip_vs_mutex);
2798         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2799                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2800                                            ip_vs_master_mcast_ifn,
2801                                            ip_vs_master_syncid, cb) < 0)
2802                         goto nla_put_failure;
2803
2804                 cb->args[0] = 1;
2805         }
2806
2807         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2808                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2809                                            ip_vs_backup_mcast_ifn,
2810                                            ip_vs_backup_syncid, cb) < 0)
2811                         goto nla_put_failure;
2812
2813                 cb->args[1] = 1;
2814         }
2815
2816 nla_put_failure:
2817         mutex_unlock(&__ip_vs_mutex);
2818
2819         return skb->len;
2820 }
2821
2822 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2823 {
2824         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2825               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2826               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2827                 return -EINVAL;
2828
2829         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2830                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2831                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2832 }
2833
2834 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2835 {
2836         if (!attrs[IPVS_DAEMON_ATTR_STATE])
2837                 return -EINVAL;
2838
2839         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2840 }
2841
2842 static int ip_vs_genl_set_config(struct nlattr **attrs)
2843 {
2844         struct ip_vs_timeout_user t;
2845
2846         __ip_vs_get_timeouts(&t);
2847
2848         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2849                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2850
2851         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2852                 t.tcp_fin_timeout =
2853                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2854
2855         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2856                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2857
2858         return ip_vs_set_timeout(&t);
2859 }
2860
2861 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2862 {
2863         struct ip_vs_service *svc = NULL;
2864         struct ip_vs_service_user usvc;
2865         struct ip_vs_dest_user udest;
2866         int ret = 0, cmd;
2867         int need_full_svc = 0, need_full_dest = 0;
2868
2869         cmd = info->genlhdr->cmd;
2870
2871         mutex_lock(&__ip_vs_mutex);
2872
2873         if (cmd == IPVS_CMD_FLUSH) {
2874                 ret = ip_vs_flush();
2875                 goto out;
2876         } else if (cmd == IPVS_CMD_SET_CONFIG) {
2877                 ret = ip_vs_genl_set_config(info->attrs);
2878                 goto out;
2879         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2880                    cmd == IPVS_CMD_DEL_DAEMON) {
2881
2882                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2883
2884                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2885                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2886                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
2887                                      ip_vs_daemon_policy)) {
2888                         ret = -EINVAL;
2889                         goto out;
2890                 }
2891
2892                 if (cmd == IPVS_CMD_NEW_DAEMON)
2893                         ret = ip_vs_genl_new_daemon(daemon_attrs);
2894                 else
2895                         ret = ip_vs_genl_del_daemon(daemon_attrs);
2896                 goto out;
2897         } else if (cmd == IPVS_CMD_ZERO &&
2898                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2899                 ret = ip_vs_zero_all();
2900                 goto out;
2901         }
2902
2903         /* All following commands require a service argument, so check if we
2904          * received a valid one. We need a full service specification when
2905          * adding / editing a service. Only identifying members otherwise. */
2906         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2907                 need_full_svc = 1;
2908
2909         ret = ip_vs_genl_parse_service(&usvc,
2910                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
2911                                        need_full_svc);
2912         if (ret)
2913                 goto out;
2914
2915         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2916         if (usvc.fwmark == 0)
2917                 svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
2918         else
2919                 svc = __ip_vs_svc_fwm_get(usvc.fwmark);
2920
2921         /* Unless we're adding a new service, the service must already exist */
2922         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2923                 ret = -ESRCH;
2924                 goto out;
2925         }
2926
2927         /* Destination commands require a valid destination argument. For
2928          * adding / editing a destination, we need a full destination
2929          * specification. */
2930         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2931             cmd == IPVS_CMD_DEL_DEST) {
2932                 if (cmd != IPVS_CMD_DEL_DEST)
2933                         need_full_dest = 1;
2934
2935                 ret = ip_vs_genl_parse_dest(&udest,
2936                                             info->attrs[IPVS_CMD_ATTR_DEST],
2937                                             need_full_dest);
2938                 if (ret)
2939                         goto out;
2940         }
2941
2942         switch (cmd) {
2943         case IPVS_CMD_NEW_SERVICE:
2944                 if (svc == NULL)
2945                         ret = ip_vs_add_service(&usvc, &svc);
2946                 else
2947                         ret = -EEXIST;
2948                 break;
2949         case IPVS_CMD_SET_SERVICE:
2950                 ret = ip_vs_edit_service(svc, &usvc);
2951                 break;
2952         case IPVS_CMD_DEL_SERVICE:
2953                 ret = ip_vs_del_service(svc);
2954                 break;
2955         case IPVS_CMD_NEW_DEST:
2956                 ret = ip_vs_add_dest(svc, &udest);
2957                 break;
2958         case IPVS_CMD_SET_DEST:
2959                 ret = ip_vs_edit_dest(svc, &udest);
2960                 break;
2961         case IPVS_CMD_DEL_DEST:
2962                 ret = ip_vs_del_dest(svc, &udest);
2963                 break;
2964         case IPVS_CMD_ZERO:
2965                 ret = ip_vs_zero_service(svc);
2966                 break;
2967         default:
2968                 ret = -EINVAL;
2969         }
2970
2971 out:
2972         if (svc)
2973                 ip_vs_service_put(svc);
2974         mutex_unlock(&__ip_vs_mutex);
2975
2976         return ret;
2977 }
2978
2979 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
2980 {
2981         struct sk_buff *msg;
2982         void *reply;
2983         int ret, cmd, reply_cmd;
2984
2985         cmd = info->genlhdr->cmd;
2986
2987         if (cmd == IPVS_CMD_GET_SERVICE)
2988                 reply_cmd = IPVS_CMD_NEW_SERVICE;
2989         else if (cmd == IPVS_CMD_GET_INFO)
2990                 reply_cmd = IPVS_CMD_SET_INFO;
2991         else if (cmd == IPVS_CMD_GET_CONFIG)
2992                 reply_cmd = IPVS_CMD_SET_CONFIG;
2993         else {
2994                 IP_VS_ERR("unknown Generic Netlink command\n");
2995                 return -EINVAL;
2996         }
2997
2998         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2999         if (!msg)
3000                 return -ENOMEM;
3001
3002         mutex_lock(&__ip_vs_mutex);
3003
3004         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3005         if (reply == NULL)
3006                 goto nla_put_failure;
3007
3008         switch (cmd) {
3009         case IPVS_CMD_GET_SERVICE:
3010         {
3011                 struct ip_vs_service *svc;
3012
3013                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3014                 if (IS_ERR(svc)) {
3015                         ret = PTR_ERR(svc);
3016                         goto out_err;
3017                 } else if (svc) {
3018                         ret = ip_vs_genl_fill_service(msg, svc);
3019                         ip_vs_service_put(svc);
3020                         if (ret)
3021                                 goto nla_put_failure;
3022                 } else {
3023                         ret = -ESRCH;
3024                         goto out_err;
3025                 }
3026
3027                 break;
3028         }
3029
3030         case IPVS_CMD_GET_CONFIG:
3031         {
3032                 struct ip_vs_timeout_user t;
3033
3034                 __ip_vs_get_timeouts(&t);
3035 #ifdef CONFIG_IP_VS_PROTO_TCP
3036                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3037                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3038                             t.tcp_fin_timeout);
3039 #endif
3040 #ifdef CONFIG_IP_VS_PROTO_UDP
3041                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3042 #endif
3043
3044                 break;
3045         }
3046
3047         case IPVS_CMD_GET_INFO:
3048                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3049                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3050                             IP_VS_CONN_TAB_SIZE);
3051                 break;
3052         }
3053
3054         genlmsg_end(msg, reply);
3055         ret = genlmsg_unicast(msg, info->snd_pid);
3056         goto out;
3057
3058 nla_put_failure:
3059         IP_VS_ERR("not enough space in Netlink message\n");
3060         ret = -EMSGSIZE;
3061
3062 out_err:
3063         nlmsg_free(msg);
3064 out:
3065         mutex_unlock(&__ip_vs_mutex);
3066
3067         return ret;
3068 }
3069
3070
3071 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3072         {
3073                 .cmd    = IPVS_CMD_NEW_SERVICE,
3074                 .flags  = GENL_ADMIN_PERM,
3075                 .policy = ip_vs_cmd_policy,
3076                 .doit   = ip_vs_genl_set_cmd,
3077         },
3078         {
3079                 .cmd    = IPVS_CMD_SET_SERVICE,
3080                 .flags  = GENL_ADMIN_PERM,
3081                 .policy = ip_vs_cmd_policy,
3082                 .doit   = ip_vs_genl_set_cmd,
3083         },
3084         {
3085                 .cmd    = IPVS_CMD_DEL_SERVICE,
3086                 .flags  = GENL_ADMIN_PERM,
3087                 .policy = ip_vs_cmd_policy,
3088                 .doit   = ip_vs_genl_set_cmd,
3089         },
3090         {
3091                 .cmd    = IPVS_CMD_GET_SERVICE,
3092                 .flags  = GENL_ADMIN_PERM,
3093                 .doit   = ip_vs_genl_get_cmd,
3094                 .dumpit = ip_vs_genl_dump_services,
3095                 .policy = ip_vs_cmd_policy,
3096         },
3097         {
3098                 .cmd    = IPVS_CMD_NEW_DEST,
3099                 .flags  = GENL_ADMIN_PERM,
3100                 .policy = ip_vs_cmd_policy,
3101                 .doit   = ip_vs_genl_set_cmd,
3102         },
3103         {
3104                 .cmd    = IPVS_CMD_SET_DEST,
3105                 .flags  = GENL_ADMIN_PERM,
3106                 .policy = ip_vs_cmd_policy,
3107                 .doit   = ip_vs_genl_set_cmd,
3108         },
3109         {
3110                 .cmd    = IPVS_CMD_DEL_DEST,
3111                 .flags  = GENL_ADMIN_PERM,
3112                 .policy = ip_vs_cmd_policy,
3113                 .doit   = ip_vs_genl_set_cmd,
3114         },
3115         {
3116                 .cmd    = IPVS_CMD_GET_DEST,
3117                 .flags  = GENL_ADMIN_PERM,
3118                 .policy = ip_vs_cmd_policy,
3119                 .dumpit = ip_vs_genl_dump_dests,
3120         },
3121         {
3122                 .cmd    = IPVS_CMD_NEW_DAEMON,
3123                 .flags  = GENL_ADMIN_PERM,
3124                 .policy = ip_vs_cmd_policy,
3125                 .doit   = ip_vs_genl_set_cmd,
3126         },
3127         {
3128                 .cmd    = IPVS_CMD_DEL_DAEMON,
3129                 .flags  = GENL_ADMIN_PERM,
3130                 .policy = ip_vs_cmd_policy,
3131                 .doit   = ip_vs_genl_set_cmd,
3132         },
3133         {
3134                 .cmd    = IPVS_CMD_GET_DAEMON,
3135                 .flags  = GENL_ADMIN_PERM,
3136                 .dumpit = ip_vs_genl_dump_daemons,
3137         },
3138         {
3139                 .cmd    = IPVS_CMD_SET_CONFIG,
3140                 .flags  = GENL_ADMIN_PERM,
3141                 .policy = ip_vs_cmd_policy,
3142                 .doit   = ip_vs_genl_set_cmd,
3143         },
3144         {
3145                 .cmd    = IPVS_CMD_GET_CONFIG,
3146                 .flags  = GENL_ADMIN_PERM,
3147                 .doit   = ip_vs_genl_get_cmd,
3148         },
3149         {
3150                 .cmd    = IPVS_CMD_GET_INFO,
3151                 .flags  = GENL_ADMIN_PERM,
3152                 .doit   = ip_vs_genl_get_cmd,
3153         },
3154         {
3155                 .cmd    = IPVS_CMD_ZERO,
3156                 .flags  = GENL_ADMIN_PERM,
3157                 .policy = ip_vs_cmd_policy,
3158                 .doit   = ip_vs_genl_set_cmd,
3159         },
3160         {
3161                 .cmd    = IPVS_CMD_FLUSH,
3162                 .flags  = GENL_ADMIN_PERM,
3163                 .doit   = ip_vs_genl_set_cmd,
3164         },
3165 };
3166
3167 static int __init ip_vs_genl_register(void)
3168 {
3169         int ret, i;
3170
3171         ret = genl_register_family(&ip_vs_genl_family);
3172         if (ret)
3173                 return ret;
3174
3175         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3176                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3177                 if (ret)
3178                         goto err_out;
3179         }
3180         return 0;
3181
3182 err_out:
3183         genl_unregister_family(&ip_vs_genl_family);
3184         return ret;
3185 }
3186
3187 static void ip_vs_genl_unregister(void)
3188 {
3189         genl_unregister_family(&ip_vs_genl_family);
3190 }
3191
3192 /* End of Generic Netlink interface definitions */
3193
3194
3195 int __init ip_vs_control_init(void)
3196 {
3197         int ret;
3198         int idx;
3199
3200         EnterFunction(2);
3201
3202         ret = nf_register_sockopt(&ip_vs_sockopts);
3203         if (ret) {
3204                 IP_VS_ERR("cannot register sockopt.\n");
3205                 return ret;
3206         }
3207
3208         ret = ip_vs_genl_register();
3209         if (ret) {
3210                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3211                 nf_unregister_sockopt(&ip_vs_sockopts);
3212                 return ret;
3213         }
3214
3215         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3216         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3217
3218         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3219
3220         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3221         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3222                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3223                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3224         }
3225         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3226                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3227         }
3228
3229         ip_vs_new_estimator(&ip_vs_stats);
3230
3231         /* Hook the defense timer */
3232         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3233
3234         LeaveFunction(2);
3235         return 0;
3236 }
3237
3238
3239 void ip_vs_control_cleanup(void)
3240 {
3241         EnterFunction(2);
3242         ip_vs_trash_cleanup();
3243         cancel_rearming_delayed_work(&defense_work);
3244         cancel_work_sync(&defense_work.work);
3245         ip_vs_kill_estimator(&ip_vs_stats);
3246         unregister_sysctl_table(sysctl_header);
3247         proc_net_remove(&init_net, "ip_vs_stats");
3248         proc_net_remove(&init_net, "ip_vs");
3249         ip_vs_genl_unregister();
3250         nf_unregister_sockopt(&ip_vs_sockopts);
3251         LeaveFunction(2);
3252 }