]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/netfilter/ipvs/ip_vs_ctl.c
ipvs: move ip_route_me_harder for ICMP
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <net/ip.h>
42 #ifdef CONFIG_IP_VS_IPV6
43 #include <net/ipv6.h>
44 #include <net/ip6_route.h>
45 #endif
46 #include <net/route.h>
47 #include <net/sock.h>
48 #include <net/genetlink.h>
49
50 #include <asm/uaccess.h>
51
52 #include <net/ip_vs.h>
53
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
56
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
79
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
93 #endif
94 int sysctl_ip_vs_snat_reroute = 1;
95
96
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
99
100 int ip_vs_get_debug_level(void)
101 {
102         return sysctl_ip_vs_debug_level;
103 }
104 #endif
105
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109 {
110         struct rt6_info *rt;
111         struct flowi fl = {
112                 .oif = 0,
113                 .nl_u = {
114                         .ip6_u = {
115                                 .daddr = *addr,
116                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
117         };
118
119         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
121                         return 1;
122
123         return 0;
124 }
125 #endif
126 /*
127  *      update_defense_level is called from keventd and from sysctl,
128  *      so it needs to protect itself from softirqs
129  */
130 static void update_defense_level(void)
131 {
132         struct sysinfo i;
133         static int old_secure_tcp = 0;
134         int availmem;
135         int nomem;
136         int to_change = -1;
137
138         /* we only count free and buffered memory (in pages) */
139         si_meminfo(&i);
140         availmem = i.freeram + i.bufferram;
141         /* however in linux 2.5 the i.bufferram is total page cache size,
142            we need adjust it */
143         /* si_swapinfo(&i); */
144         /* availmem = availmem - (i.totalswap - i.freeswap); */
145
146         nomem = (availmem < sysctl_ip_vs_amemthresh);
147
148         local_bh_disable();
149
150         /* drop_entry */
151         spin_lock(&__ip_vs_dropentry_lock);
152         switch (sysctl_ip_vs_drop_entry) {
153         case 0:
154                 atomic_set(&ip_vs_dropentry, 0);
155                 break;
156         case 1:
157                 if (nomem) {
158                         atomic_set(&ip_vs_dropentry, 1);
159                         sysctl_ip_vs_drop_entry = 2;
160                 } else {
161                         atomic_set(&ip_vs_dropentry, 0);
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         atomic_set(&ip_vs_dropentry, 1);
167                 } else {
168                         atomic_set(&ip_vs_dropentry, 0);
169                         sysctl_ip_vs_drop_entry = 1;
170                 };
171                 break;
172         case 3:
173                 atomic_set(&ip_vs_dropentry, 1);
174                 break;
175         }
176         spin_unlock(&__ip_vs_dropentry_lock);
177
178         /* drop_packet */
179         spin_lock(&__ip_vs_droppacket_lock);
180         switch (sysctl_ip_vs_drop_packet) {
181         case 0:
182                 ip_vs_drop_rate = 0;
183                 break;
184         case 1:
185                 if (nomem) {
186                         ip_vs_drop_rate = ip_vs_drop_counter
187                                 = sysctl_ip_vs_amemthresh /
188                                 (sysctl_ip_vs_amemthresh-availmem);
189                         sysctl_ip_vs_drop_packet = 2;
190                 } else {
191                         ip_vs_drop_rate = 0;
192                 }
193                 break;
194         case 2:
195                 if (nomem) {
196                         ip_vs_drop_rate = ip_vs_drop_counter
197                                 = sysctl_ip_vs_amemthresh /
198                                 (sysctl_ip_vs_amemthresh-availmem);
199                 } else {
200                         ip_vs_drop_rate = 0;
201                         sysctl_ip_vs_drop_packet = 1;
202                 }
203                 break;
204         case 3:
205                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
206                 break;
207         }
208         spin_unlock(&__ip_vs_droppacket_lock);
209
210         /* secure_tcp */
211         spin_lock(&ip_vs_securetcp_lock);
212         switch (sysctl_ip_vs_secure_tcp) {
213         case 0:
214                 if (old_secure_tcp >= 2)
215                         to_change = 0;
216                 break;
217         case 1:
218                 if (nomem) {
219                         if (old_secure_tcp < 2)
220                                 to_change = 1;
221                         sysctl_ip_vs_secure_tcp = 2;
222                 } else {
223                         if (old_secure_tcp >= 2)
224                                 to_change = 0;
225                 }
226                 break;
227         case 2:
228                 if (nomem) {
229                         if (old_secure_tcp < 2)
230                                 to_change = 1;
231                 } else {
232                         if (old_secure_tcp >= 2)
233                                 to_change = 0;
234                         sysctl_ip_vs_secure_tcp = 1;
235                 }
236                 break;
237         case 3:
238                 if (old_secure_tcp < 2)
239                         to_change = 1;
240                 break;
241         }
242         old_secure_tcp = sysctl_ip_vs_secure_tcp;
243         if (to_change >= 0)
244                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
245         spin_unlock(&ip_vs_securetcp_lock);
246
247         local_bh_enable();
248 }
249
250
251 /*
252  *      Timer for checking the defense
253  */
254 #define DEFENSE_TIMER_PERIOD    1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
257
258 static void defense_work_handler(struct work_struct *work)
259 {
260         update_defense_level();
261         if (atomic_read(&ip_vs_dropentry))
262                 ip_vs_random_dropentry();
263
264         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
265 }
266
267 int
268 ip_vs_use_count_inc(void)
269 {
270         return try_module_get(THIS_MODULE);
271 }
272
273 void
274 ip_vs_use_count_dec(void)
275 {
276         module_put(THIS_MODULE);
277 }
278
279
280 /*
281  *      Hash table: for virtual service lookups
282  */
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
286
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
291
292 /*
293  *      Hash table: for real service lookups
294  */
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
298
299 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
300
301 /*
302  *      Trash for destinations
303  */
304 static LIST_HEAD(ip_vs_dest_trash);
305
306 /*
307  *      FTP & NULL virtual service counters
308  */
309 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
311
312
313 /*
314  *      Returns hash value for virtual service
315  */
316 static __inline__ unsigned
317 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
318                   __be16 port)
319 {
320         register unsigned porth = ntohs(port);
321         __be32 addr_fold = addr->ip;
322
323 #ifdef CONFIG_IP_VS_IPV6
324         if (af == AF_INET6)
325                 addr_fold = addr->ip6[0]^addr->ip6[1]^
326                             addr->ip6[2]^addr->ip6[3];
327 #endif
328
329         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
330                 & IP_VS_SVC_TAB_MASK;
331 }
332
333 /*
334  *      Returns hash value of fwmark for virtual service lookup
335  */
336 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
337 {
338         return fwmark & IP_VS_SVC_TAB_MASK;
339 }
340
341 /*
342  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343  *      or in the ip_vs_svc_fwm_table by fwmark.
344  *      Should be called with locked tables.
345  */
346 static int ip_vs_svc_hash(struct ip_vs_service *svc)
347 {
348         unsigned hash;
349
350         if (svc->flags & IP_VS_SVC_F_HASHED) {
351                 pr_err("%s(): request for already hashed, called from %pF\n",
352                        __func__, __builtin_return_address(0));
353                 return 0;
354         }
355
356         if (svc->fwmark == 0) {
357                 /*
358                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
359                  */
360                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
361                                          svc->port);
362                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
363         } else {
364                 /*
365                  *  Hash it by fwmark in ip_vs_svc_fwm_table
366                  */
367                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
369         }
370
371         svc->flags |= IP_VS_SVC_F_HASHED;
372         /* increase its refcnt because it is referenced by the svc table */
373         atomic_inc(&svc->refcnt);
374         return 1;
375 }
376
377
378 /*
379  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380  *      Should be called with locked tables.
381  */
382 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
383 {
384         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
385                 pr_err("%s(): request for unhash flagged, called from %pF\n",
386                        __func__, __builtin_return_address(0));
387                 return 0;
388         }
389
390         if (svc->fwmark == 0) {
391                 /* Remove it from the ip_vs_svc_table table */
392                 list_del(&svc->s_list);
393         } else {
394                 /* Remove it from the ip_vs_svc_fwm_table table */
395                 list_del(&svc->f_list);
396         }
397
398         svc->flags &= ~IP_VS_SVC_F_HASHED;
399         atomic_dec(&svc->refcnt);
400         return 1;
401 }
402
403
404 /*
405  *      Get service by {proto,addr,port} in the service table.
406  */
407 static inline struct ip_vs_service *
408 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
409                     __be16 vport)
410 {
411         unsigned hash;
412         struct ip_vs_service *svc;
413
414         /* Check for "full" addressed entries */
415         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
416
417         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
418                 if ((svc->af == af)
419                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
420                     && (svc->port == vport)
421                     && (svc->protocol == protocol)) {
422                         /* HIT */
423                         return svc;
424                 }
425         }
426
427         return NULL;
428 }
429
430
431 /*
432  *      Get service by {fwmark} in the service table.
433  */
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
436 {
437         unsigned hash;
438         struct ip_vs_service *svc;
439
440         /* Check for fwmark addressed entries */
441         hash = ip_vs_svc_fwm_hashkey(fwmark);
442
443         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444                 if (svc->fwmark == fwmark && svc->af == af) {
445                         /* HIT */
446                         return svc;
447                 }
448         }
449
450         return NULL;
451 }
452
453 struct ip_vs_service *
454 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
455                   const union nf_inet_addr *vaddr, __be16 vport)
456 {
457         struct ip_vs_service *svc;
458
459         read_lock(&__ip_vs_svc_lock);
460
461         /*
462          *      Check the table hashed by fwmark first
463          */
464         if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
465                 goto out;
466
467         /*
468          *      Check the table hashed by <protocol,addr,port>
469          *      for "full" addressed entries
470          */
471         svc = __ip_vs_service_find(af, protocol, vaddr, vport);
472
473         if (svc == NULL
474             && protocol == IPPROTO_TCP
475             && atomic_read(&ip_vs_ftpsvc_counter)
476             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
477                 /*
478                  * Check if ftp service entry exists, the packet
479                  * might belong to FTP data connections.
480                  */
481                 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
482         }
483
484         if (svc == NULL
485             && atomic_read(&ip_vs_nullsvc_counter)) {
486                 /*
487                  * Check if the catch-all port (port zero) exists
488                  */
489                 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
490         }
491
492   out:
493         if (svc)
494                 atomic_inc(&svc->usecnt);
495         read_unlock(&__ip_vs_svc_lock);
496
497         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498                       fwmark, ip_vs_proto_name(protocol),
499                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500                       svc ? "hit" : "not hit");
501
502         return svc;
503 }
504
505
506 static inline void
507 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
508 {
509         atomic_inc(&svc->refcnt);
510         dest->svc = svc;
511 }
512
513 static void
514 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
515 {
516         struct ip_vs_service *svc = dest->svc;
517
518         dest->svc = NULL;
519         if (atomic_dec_and_test(&svc->refcnt)) {
520                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521                               svc->fwmark,
522                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
523                               ntohs(svc->port), atomic_read(&svc->usecnt));
524                 kfree(svc);
525         }
526 }
527
528
529 /*
530  *      Returns hash value for real service
531  */
532 static inline unsigned ip_vs_rs_hashkey(int af,
533                                             const union nf_inet_addr *addr,
534                                             __be16 port)
535 {
536         register unsigned porth = ntohs(port);
537         __be32 addr_fold = addr->ip;
538
539 #ifdef CONFIG_IP_VS_IPV6
540         if (af == AF_INET6)
541                 addr_fold = addr->ip6[0]^addr->ip6[1]^
542                             addr->ip6[2]^addr->ip6[3];
543 #endif
544
545         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
546                 & IP_VS_RTAB_MASK;
547 }
548
549 /*
550  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
551  *      should be called with locked tables.
552  */
553 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
554 {
555         unsigned hash;
556
557         if (!list_empty(&dest->d_list)) {
558                 return 0;
559         }
560
561         /*
562          *      Hash by proto,addr,port,
563          *      which are the parameters of the real service.
564          */
565         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
566
567         list_add(&dest->d_list, &ip_vs_rtable[hash]);
568
569         return 1;
570 }
571
572 /*
573  *      UNhashes ip_vs_dest from ip_vs_rtable.
574  *      should be called with locked tables.
575  */
576 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
577 {
578         /*
579          * Remove it from the ip_vs_rtable table.
580          */
581         if (!list_empty(&dest->d_list)) {
582                 list_del(&dest->d_list);
583                 INIT_LIST_HEAD(&dest->d_list);
584         }
585
586         return 1;
587 }
588
589 /*
590  *      Lookup real service by <proto,addr,port> in the real service table.
591  */
592 struct ip_vs_dest *
593 ip_vs_lookup_real_service(int af, __u16 protocol,
594                           const union nf_inet_addr *daddr,
595                           __be16 dport)
596 {
597         unsigned hash;
598         struct ip_vs_dest *dest;
599
600         /*
601          *      Check for "full" addressed entries
602          *      Return the first found entry
603          */
604         hash = ip_vs_rs_hashkey(af, daddr, dport);
605
606         read_lock(&__ip_vs_rs_lock);
607         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
608                 if ((dest->af == af)
609                     && ip_vs_addr_equal(af, &dest->addr, daddr)
610                     && (dest->port == dport)
611                     && ((dest->protocol == protocol) ||
612                         dest->vfwmark)) {
613                         /* HIT */
614                         read_unlock(&__ip_vs_rs_lock);
615                         return dest;
616                 }
617         }
618         read_unlock(&__ip_vs_rs_lock);
619
620         return NULL;
621 }
622
623 /*
624  *      Lookup destination by {addr,port} in the given service
625  */
626 static struct ip_vs_dest *
627 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
628                   __be16 dport)
629 {
630         struct ip_vs_dest *dest;
631
632         /*
633          * Find the destination for the given service
634          */
635         list_for_each_entry(dest, &svc->destinations, n_list) {
636                 if ((dest->af == svc->af)
637                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
638                     && (dest->port == dport)) {
639                         /* HIT */
640                         return dest;
641                 }
642         }
643
644         return NULL;
645 }
646
647 /*
648  * Find destination by {daddr,dport,vaddr,protocol}
649  * Cretaed to be used in ip_vs_process_message() in
650  * the backup synchronization daemon. It finds the
651  * destination to be bound to the received connection
652  * on the backup.
653  *
654  * ip_vs_lookup_real_service() looked promissing, but
655  * seems not working as expected.
656  */
657 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
658                                    __be16 dport,
659                                    const union nf_inet_addr *vaddr,
660                                    __be16 vport, __u16 protocol)
661 {
662         struct ip_vs_dest *dest;
663         struct ip_vs_service *svc;
664
665         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
666         if (!svc)
667                 return NULL;
668         dest = ip_vs_lookup_dest(svc, daddr, dport);
669         if (dest)
670                 atomic_inc(&dest->refcnt);
671         ip_vs_service_put(svc);
672         return dest;
673 }
674
675 /*
676  *  Lookup dest by {svc,addr,port} in the destination trash.
677  *  The destination trash is used to hold the destinations that are removed
678  *  from the service table but are still referenced by some conn entries.
679  *  The reason to add the destination trash is when the dest is temporary
680  *  down (either by administrator or by monitor program), the dest can be
681  *  picked back from the trash, the remaining connections to the dest can
682  *  continue, and the counting information of the dest is also useful for
683  *  scheduling.
684  */
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687                      __be16 dport)
688 {
689         struct ip_vs_dest *dest, *nxt;
690
691         /*
692          * Find the destination in trash
693          */
694         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696                               "dest->refcnt=%d\n",
697                               dest->vfwmark,
698                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
699                               ntohs(dest->port),
700                               atomic_read(&dest->refcnt));
701                 if (dest->af == svc->af &&
702                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703                     dest->port == dport &&
704                     dest->vfwmark == svc->fwmark &&
705                     dest->protocol == svc->protocol &&
706                     (svc->fwmark ||
707                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708                       dest->vport == svc->port))) {
709                         /* HIT */
710                         return dest;
711                 }
712
713                 /*
714                  * Try to purge the destination from trash if not referenced
715                  */
716                 if (atomic_read(&dest->refcnt) == 1) {
717                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718                                       "from trash\n",
719                                       dest->vfwmark,
720                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
721                                       ntohs(dest->port));
722                         list_del(&dest->n_list);
723                         ip_vs_dst_reset(dest);
724                         __ip_vs_unbind_svc(dest);
725                         kfree(dest);
726                 }
727         }
728
729         return NULL;
730 }
731
732
733 /*
734  *  Clean up all the destinations in the trash
735  *  Called by the ip_vs_control_cleanup()
736  *
737  *  When the ip_vs_control_clearup is activated by ipvs module exit,
738  *  the service tables must have been flushed and all the connections
739  *  are expired, and the refcnt of each destination in the trash must
740  *  be 1, so we simply release them here.
741  */
742 static void ip_vs_trash_cleanup(void)
743 {
744         struct ip_vs_dest *dest, *nxt;
745
746         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747                 list_del(&dest->n_list);
748                 ip_vs_dst_reset(dest);
749                 __ip_vs_unbind_svc(dest);
750                 kfree(dest);
751         }
752 }
753
754
755 static void
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
757 {
758         spin_lock_bh(&stats->lock);
759
760         memset(&stats->ustats, 0, sizeof(stats->ustats));
761         ip_vs_zero_estimator(stats);
762
763         spin_unlock_bh(&stats->lock);
764 }
765
766 /*
767  *      Update a destination in the given service
768  */
769 static void
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771                     struct ip_vs_dest_user_kern *udest, int add)
772 {
773         int conn_flags;
774
775         /* set the weight and the flags */
776         atomic_set(&dest->weight, udest->weight);
777         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778         conn_flags |= IP_VS_CONN_F_INACTIVE;
779
780         /* check if local node and update the flags */
781 #ifdef CONFIG_IP_VS_IPV6
782         if (svc->af == AF_INET6) {
783                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
784                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
785                                 | IP_VS_CONN_F_LOCALNODE;
786                 }
787         } else
788 #endif
789                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
790                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
791                                 | IP_VS_CONN_F_LOCALNODE;
792                 }
793
794         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
795         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
796                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
797         } else {
798                 /*
799                  *    Put the real service in ip_vs_rtable if not present.
800                  *    For now only for NAT!
801                  */
802                 write_lock_bh(&__ip_vs_rs_lock);
803                 ip_vs_rs_hash(dest);
804                 write_unlock_bh(&__ip_vs_rs_lock);
805         }
806         atomic_set(&dest->conn_flags, conn_flags);
807
808         /* bind the service */
809         if (!dest->svc) {
810                 __ip_vs_bind_svc(dest, svc);
811         } else {
812                 if (dest->svc != svc) {
813                         __ip_vs_unbind_svc(dest);
814                         ip_vs_zero_stats(&dest->stats);
815                         __ip_vs_bind_svc(dest, svc);
816                 }
817         }
818
819         /* set the dest status flags */
820         dest->flags |= IP_VS_DEST_F_AVAILABLE;
821
822         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
823                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
824         dest->u_threshold = udest->u_threshold;
825         dest->l_threshold = udest->l_threshold;
826
827         if (add)
828                 ip_vs_new_estimator(&dest->stats);
829
830         write_lock_bh(&__ip_vs_svc_lock);
831
832         /* Wait until all other svc users go away */
833         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
834
835         if (add) {
836                 list_add(&dest->n_list, &svc->destinations);
837                 svc->num_dests++;
838         }
839
840         /* call the update_service, because server weight may be changed */
841         if (svc->scheduler->update_service)
842                 svc->scheduler->update_service(svc);
843
844         write_unlock_bh(&__ip_vs_svc_lock);
845 }
846
847
848 /*
849  *      Create a destination for the given service
850  */
851 static int
852 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
853                struct ip_vs_dest **dest_p)
854 {
855         struct ip_vs_dest *dest;
856         unsigned atype;
857
858         EnterFunction(2);
859
860 #ifdef CONFIG_IP_VS_IPV6
861         if (svc->af == AF_INET6) {
862                 atype = ipv6_addr_type(&udest->addr.in6);
863                 if ((!(atype & IPV6_ADDR_UNICAST) ||
864                         atype & IPV6_ADDR_LINKLOCAL) &&
865                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
866                         return -EINVAL;
867         } else
868 #endif
869         {
870                 atype = inet_addr_type(&init_net, udest->addr.ip);
871                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
872                         return -EINVAL;
873         }
874
875         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
876         if (dest == NULL) {
877                 pr_err("%s(): no memory.\n", __func__);
878                 return -ENOMEM;
879         }
880
881         dest->af = svc->af;
882         dest->protocol = svc->protocol;
883         dest->vaddr = svc->addr;
884         dest->vport = svc->port;
885         dest->vfwmark = svc->fwmark;
886         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
887         dest->port = udest->port;
888
889         atomic_set(&dest->activeconns, 0);
890         atomic_set(&dest->inactconns, 0);
891         atomic_set(&dest->persistconns, 0);
892         atomic_set(&dest->refcnt, 1);
893
894         INIT_LIST_HEAD(&dest->d_list);
895         spin_lock_init(&dest->dst_lock);
896         spin_lock_init(&dest->stats.lock);
897         __ip_vs_update_dest(svc, dest, udest, 1);
898
899         *dest_p = dest;
900
901         LeaveFunction(2);
902         return 0;
903 }
904
905
906 /*
907  *      Add a destination into an existing service
908  */
909 static int
910 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
911 {
912         struct ip_vs_dest *dest;
913         union nf_inet_addr daddr;
914         __be16 dport = udest->port;
915         int ret;
916
917         EnterFunction(2);
918
919         if (udest->weight < 0) {
920                 pr_err("%s(): server weight less than zero\n", __func__);
921                 return -ERANGE;
922         }
923
924         if (udest->l_threshold > udest->u_threshold) {
925                 pr_err("%s(): lower threshold is higher than upper threshold\n",
926                         __func__);
927                 return -ERANGE;
928         }
929
930         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
931
932         /*
933          * Check if the dest already exists in the list
934          */
935         dest = ip_vs_lookup_dest(svc, &daddr, dport);
936
937         if (dest != NULL) {
938                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
939                 return -EEXIST;
940         }
941
942         /*
943          * Check if the dest already exists in the trash and
944          * is from the same service
945          */
946         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
947
948         if (dest != NULL) {
949                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
950                               "dest->refcnt=%d, service %u/%s:%u\n",
951                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
952                               atomic_read(&dest->refcnt),
953                               dest->vfwmark,
954                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
955                               ntohs(dest->vport));
956
957                 /*
958                  * Get the destination from the trash
959                  */
960                 list_del(&dest->n_list);
961
962                 __ip_vs_update_dest(svc, dest, udest, 1);
963                 ret = 0;
964         } else {
965                 /*
966                  * Allocate and initialize the dest structure
967                  */
968                 ret = ip_vs_new_dest(svc, udest, &dest);
969         }
970         LeaveFunction(2);
971
972         return ret;
973 }
974
975
976 /*
977  *      Edit a destination in the given service
978  */
979 static int
980 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
981 {
982         struct ip_vs_dest *dest;
983         union nf_inet_addr daddr;
984         __be16 dport = udest->port;
985
986         EnterFunction(2);
987
988         if (udest->weight < 0) {
989                 pr_err("%s(): server weight less than zero\n", __func__);
990                 return -ERANGE;
991         }
992
993         if (udest->l_threshold > udest->u_threshold) {
994                 pr_err("%s(): lower threshold is higher than upper threshold\n",
995                         __func__);
996                 return -ERANGE;
997         }
998
999         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1000
1001         /*
1002          *  Lookup the destination list
1003          */
1004         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1005
1006         if (dest == NULL) {
1007                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1008                 return -ENOENT;
1009         }
1010
1011         __ip_vs_update_dest(svc, dest, udest, 0);
1012         LeaveFunction(2);
1013
1014         return 0;
1015 }
1016
1017
1018 /*
1019  *      Delete a destination (must be already unlinked from the service)
1020  */
1021 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1022 {
1023         ip_vs_kill_estimator(&dest->stats);
1024
1025         /*
1026          *  Remove it from the d-linked list with the real services.
1027          */
1028         write_lock_bh(&__ip_vs_rs_lock);
1029         ip_vs_rs_unhash(dest);
1030         write_unlock_bh(&__ip_vs_rs_lock);
1031
1032         /*
1033          *  Decrease the refcnt of the dest, and free the dest
1034          *  if nobody refers to it (refcnt=0). Otherwise, throw
1035          *  the destination into the trash.
1036          */
1037         if (atomic_dec_and_test(&dest->refcnt)) {
1038                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1039                               dest->vfwmark,
1040                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1041                               ntohs(dest->port));
1042                 ip_vs_dst_reset(dest);
1043                 /* simply decrease svc->refcnt here, let the caller check
1044                    and release the service if nobody refers to it.
1045                    Only user context can release destination and service,
1046                    and only one user context can update virtual service at a
1047                    time, so the operation here is OK */
1048                 atomic_dec(&dest->svc->refcnt);
1049                 kfree(dest);
1050         } else {
1051                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1052                               "dest->refcnt=%d\n",
1053                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1054                               ntohs(dest->port),
1055                               atomic_read(&dest->refcnt));
1056                 list_add(&dest->n_list, &ip_vs_dest_trash);
1057                 atomic_inc(&dest->refcnt);
1058         }
1059 }
1060
1061
1062 /*
1063  *      Unlink a destination from the given service
1064  */
1065 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1066                                 struct ip_vs_dest *dest,
1067                                 int svcupd)
1068 {
1069         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1070
1071         /*
1072          *  Remove it from the d-linked destination list.
1073          */
1074         list_del(&dest->n_list);
1075         svc->num_dests--;
1076
1077         /*
1078          *  Call the update_service function of its scheduler
1079          */
1080         if (svcupd && svc->scheduler->update_service)
1081                         svc->scheduler->update_service(svc);
1082 }
1083
1084
1085 /*
1086  *      Delete a destination server in the given service
1087  */
1088 static int
1089 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1090 {
1091         struct ip_vs_dest *dest;
1092         __be16 dport = udest->port;
1093
1094         EnterFunction(2);
1095
1096         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1097
1098         if (dest == NULL) {
1099                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1100                 return -ENOENT;
1101         }
1102
1103         write_lock_bh(&__ip_vs_svc_lock);
1104
1105         /*
1106          *      Wait until all other svc users go away.
1107          */
1108         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1109
1110         /*
1111          *      Unlink dest from the service
1112          */
1113         __ip_vs_unlink_dest(svc, dest, 1);
1114
1115         write_unlock_bh(&__ip_vs_svc_lock);
1116
1117         /*
1118          *      Delete the destination
1119          */
1120         __ip_vs_del_dest(dest);
1121
1122         LeaveFunction(2);
1123
1124         return 0;
1125 }
1126
1127
1128 /*
1129  *      Add a service into the service hash table
1130  */
1131 static int
1132 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1133                   struct ip_vs_service **svc_p)
1134 {
1135         int ret = 0;
1136         struct ip_vs_scheduler *sched = NULL;
1137         struct ip_vs_pe *pe = NULL;
1138         struct ip_vs_service *svc = NULL;
1139
1140         /* increase the module use count */
1141         ip_vs_use_count_inc();
1142
1143         /* Lookup the scheduler by 'u->sched_name' */
1144         sched = ip_vs_scheduler_get(u->sched_name);
1145         if (sched == NULL) {
1146                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1147                 ret = -ENOENT;
1148                 goto out_err;
1149         }
1150
1151         if (u->pe_name && *u->pe_name) {
1152                 pe = ip_vs_pe_get(u->pe_name);
1153                 if (pe == NULL) {
1154                         pr_info("persistence engine module ip_vs_pe_%s "
1155                                 "not found\n", u->pe_name);
1156                         ret = -ENOENT;
1157                         goto out_err;
1158                 }
1159         }
1160
1161 #ifdef CONFIG_IP_VS_IPV6
1162         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1163                 ret = -EINVAL;
1164                 goto out_err;
1165         }
1166 #endif
1167
1168         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1169         if (svc == NULL) {
1170                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1171                 ret = -ENOMEM;
1172                 goto out_err;
1173         }
1174
1175         /* I'm the first user of the service */
1176         atomic_set(&svc->usecnt, 0);
1177         atomic_set(&svc->refcnt, 0);
1178
1179         svc->af = u->af;
1180         svc->protocol = u->protocol;
1181         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1182         svc->port = u->port;
1183         svc->fwmark = u->fwmark;
1184         svc->flags = u->flags;
1185         svc->timeout = u->timeout * HZ;
1186         svc->netmask = u->netmask;
1187
1188         INIT_LIST_HEAD(&svc->destinations);
1189         rwlock_init(&svc->sched_lock);
1190         spin_lock_init(&svc->stats.lock);
1191
1192         /* Bind the scheduler */
1193         ret = ip_vs_bind_scheduler(svc, sched);
1194         if (ret)
1195                 goto out_err;
1196         sched = NULL;
1197
1198         /* Bind the ct retriever */
1199         ip_vs_bind_pe(svc, pe);
1200         pe = NULL;
1201
1202         /* Update the virtual service counters */
1203         if (svc->port == FTPPORT)
1204                 atomic_inc(&ip_vs_ftpsvc_counter);
1205         else if (svc->port == 0)
1206                 atomic_inc(&ip_vs_nullsvc_counter);
1207
1208         ip_vs_new_estimator(&svc->stats);
1209
1210         /* Count only IPv4 services for old get/setsockopt interface */
1211         if (svc->af == AF_INET)
1212                 ip_vs_num_services++;
1213
1214         /* Hash the service into the service table */
1215         write_lock_bh(&__ip_vs_svc_lock);
1216         ip_vs_svc_hash(svc);
1217         write_unlock_bh(&__ip_vs_svc_lock);
1218
1219         *svc_p = svc;
1220         return 0;
1221
1222  out_err:
1223         if (svc != NULL) {
1224                 ip_vs_unbind_scheduler(svc);
1225                 if (svc->inc) {
1226                         local_bh_disable();
1227                         ip_vs_app_inc_put(svc->inc);
1228                         local_bh_enable();
1229                 }
1230                 kfree(svc);
1231         }
1232         ip_vs_scheduler_put(sched);
1233         ip_vs_pe_put(pe);
1234
1235         /* decrease the module use count */
1236         ip_vs_use_count_dec();
1237
1238         return ret;
1239 }
1240
1241
1242 /*
1243  *      Edit a service and bind it with a new scheduler
1244  */
1245 static int
1246 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1247 {
1248         struct ip_vs_scheduler *sched, *old_sched;
1249         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1250         int ret = 0;
1251
1252         /*
1253          * Lookup the scheduler, by 'u->sched_name'
1254          */
1255         sched = ip_vs_scheduler_get(u->sched_name);
1256         if (sched == NULL) {
1257                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1258                 return -ENOENT;
1259         }
1260         old_sched = sched;
1261
1262         if (u->pe_name && *u->pe_name) {
1263                 pe = ip_vs_pe_get(u->pe_name);
1264                 if (pe == NULL) {
1265                         pr_info("persistence engine module ip_vs_pe_%s "
1266                                 "not found\n", u->pe_name);
1267                         ret = -ENOENT;
1268                         goto out;
1269                 }
1270                 old_pe = pe;
1271         }
1272
1273 #ifdef CONFIG_IP_VS_IPV6
1274         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1275                 ret = -EINVAL;
1276                 goto out;
1277         }
1278 #endif
1279
1280         write_lock_bh(&__ip_vs_svc_lock);
1281
1282         /*
1283          * Wait until all other svc users go away.
1284          */
1285         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1286
1287         /*
1288          * Set the flags and timeout value
1289          */
1290         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291         svc->timeout = u->timeout * HZ;
1292         svc->netmask = u->netmask;
1293
1294         old_sched = svc->scheduler;
1295         if (sched != old_sched) {
1296                 /*
1297                  * Unbind the old scheduler
1298                  */
1299                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1300                         old_sched = sched;
1301                         goto out_unlock;
1302                 }
1303
1304                 /*
1305                  * Bind the new scheduler
1306                  */
1307                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1308                         /*
1309                          * If ip_vs_bind_scheduler fails, restore the old
1310                          * scheduler.
1311                          * The main reason of failure is out of memory.
1312                          *
1313                          * The question is if the old scheduler can be
1314                          * restored all the time. TODO: if it cannot be
1315                          * restored some time, we must delete the service,
1316                          * otherwise the system may crash.
1317                          */
1318                         ip_vs_bind_scheduler(svc, old_sched);
1319                         old_sched = sched;
1320                         goto out_unlock;
1321                 }
1322         }
1323
1324         old_pe = svc->pe;
1325         if (pe != old_pe) {
1326                 ip_vs_unbind_pe(svc);
1327                 ip_vs_bind_pe(svc, pe);
1328         }
1329
1330   out_unlock:
1331         write_unlock_bh(&__ip_vs_svc_lock);
1332   out:
1333         ip_vs_scheduler_put(old_sched);
1334         ip_vs_pe_put(old_pe);
1335         return ret;
1336 }
1337
1338
1339 /*
1340  *      Delete a service from the service list
1341  *      - The service must be unlinked, unlocked and not referenced!
1342  *      - We are called under _bh lock
1343  */
1344 static void __ip_vs_del_service(struct ip_vs_service *svc)
1345 {
1346         struct ip_vs_dest *dest, *nxt;
1347         struct ip_vs_scheduler *old_sched;
1348         struct ip_vs_pe *old_pe;
1349
1350         pr_info("%s: enter\n", __func__);
1351
1352         /* Count only IPv4 services for old get/setsockopt interface */
1353         if (svc->af == AF_INET)
1354                 ip_vs_num_services--;
1355
1356         ip_vs_kill_estimator(&svc->stats);
1357
1358         /* Unbind scheduler */
1359         old_sched = svc->scheduler;
1360         ip_vs_unbind_scheduler(svc);
1361         ip_vs_scheduler_put(old_sched);
1362
1363         /* Unbind persistence engine */
1364         old_pe = svc->pe;
1365         ip_vs_unbind_pe(svc);
1366         ip_vs_pe_put(old_pe);
1367
1368         /* Unbind app inc */
1369         if (svc->inc) {
1370                 ip_vs_app_inc_put(svc->inc);
1371                 svc->inc = NULL;
1372         }
1373
1374         /*
1375          *    Unlink the whole destination list
1376          */
1377         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1378                 __ip_vs_unlink_dest(svc, dest, 0);
1379                 __ip_vs_del_dest(dest);
1380         }
1381
1382         /*
1383          *    Update the virtual service counters
1384          */
1385         if (svc->port == FTPPORT)
1386                 atomic_dec(&ip_vs_ftpsvc_counter);
1387         else if (svc->port == 0)
1388                 atomic_dec(&ip_vs_nullsvc_counter);
1389
1390         /*
1391          *    Free the service if nobody refers to it
1392          */
1393         if (atomic_read(&svc->refcnt) == 0) {
1394                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1395                               svc->fwmark,
1396                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1397                               ntohs(svc->port), atomic_read(&svc->usecnt));
1398                 kfree(svc);
1399         }
1400
1401         /* decrease the module use count */
1402         ip_vs_use_count_dec();
1403 }
1404
1405 /*
1406  * Unlink a service from list and try to delete it if its refcnt reached 0
1407  */
1408 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1409 {
1410         /*
1411          * Unhash it from the service table
1412          */
1413         write_lock_bh(&__ip_vs_svc_lock);
1414
1415         ip_vs_svc_unhash(svc);
1416
1417         /*
1418          * Wait until all the svc users go away.
1419          */
1420         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1421
1422         __ip_vs_del_service(svc);
1423
1424         write_unlock_bh(&__ip_vs_svc_lock);
1425 }
1426
1427 /*
1428  *      Delete a service from the service list
1429  */
1430 static int ip_vs_del_service(struct ip_vs_service *svc)
1431 {
1432         if (svc == NULL)
1433                 return -EEXIST;
1434         ip_vs_unlink_service(svc);
1435
1436         return 0;
1437 }
1438
1439
1440 /*
1441  *      Flush all the virtual services
1442  */
1443 static int ip_vs_flush(void)
1444 {
1445         int idx;
1446         struct ip_vs_service *svc, *nxt;
1447
1448         /*
1449          * Flush the service table hashed by <protocol,addr,port>
1450          */
1451         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1452                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1453                         ip_vs_unlink_service(svc);
1454                 }
1455         }
1456
1457         /*
1458          * Flush the service table hashed by fwmark
1459          */
1460         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1461                 list_for_each_entry_safe(svc, nxt,
1462                                          &ip_vs_svc_fwm_table[idx], f_list) {
1463                         ip_vs_unlink_service(svc);
1464                 }
1465         }
1466
1467         return 0;
1468 }
1469
1470
1471 /*
1472  *      Zero counters in a service or all services
1473  */
1474 static int ip_vs_zero_service(struct ip_vs_service *svc)
1475 {
1476         struct ip_vs_dest *dest;
1477
1478         write_lock_bh(&__ip_vs_svc_lock);
1479         list_for_each_entry(dest, &svc->destinations, n_list) {
1480                 ip_vs_zero_stats(&dest->stats);
1481         }
1482         ip_vs_zero_stats(&svc->stats);
1483         write_unlock_bh(&__ip_vs_svc_lock);
1484         return 0;
1485 }
1486
1487 static int ip_vs_zero_all(void)
1488 {
1489         int idx;
1490         struct ip_vs_service *svc;
1491
1492         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1493                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1494                         ip_vs_zero_service(svc);
1495                 }
1496         }
1497
1498         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1499                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1500                         ip_vs_zero_service(svc);
1501                 }
1502         }
1503
1504         ip_vs_zero_stats(&ip_vs_stats);
1505         return 0;
1506 }
1507
1508
1509 static int
1510 proc_do_defense_mode(ctl_table *table, int write,
1511                      void __user *buffer, size_t *lenp, loff_t *ppos)
1512 {
1513         int *valp = table->data;
1514         int val = *valp;
1515         int rc;
1516
1517         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1518         if (write && (*valp != val)) {
1519                 if ((*valp < 0) || (*valp > 3)) {
1520                         /* Restore the correct value */
1521                         *valp = val;
1522                 } else {
1523                         update_defense_level();
1524                 }
1525         }
1526         return rc;
1527 }
1528
1529
1530 static int
1531 proc_do_sync_threshold(ctl_table *table, int write,
1532                        void __user *buffer, size_t *lenp, loff_t *ppos)
1533 {
1534         int *valp = table->data;
1535         int val[2];
1536         int rc;
1537
1538         /* backup the value first */
1539         memcpy(val, valp, sizeof(val));
1540
1541         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1542         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1543                 /* Restore the correct value */
1544                 memcpy(valp, val, sizeof(val));
1545         }
1546         return rc;
1547 }
1548
1549
1550 /*
1551  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1552  */
1553
1554 static struct ctl_table vs_vars[] = {
1555         {
1556                 .procname       = "amemthresh",
1557                 .data           = &sysctl_ip_vs_amemthresh,
1558                 .maxlen         = sizeof(int),
1559                 .mode           = 0644,
1560                 .proc_handler   = proc_dointvec,
1561         },
1562 #ifdef CONFIG_IP_VS_DEBUG
1563         {
1564                 .procname       = "debug_level",
1565                 .data           = &sysctl_ip_vs_debug_level,
1566                 .maxlen         = sizeof(int),
1567                 .mode           = 0644,
1568                 .proc_handler   = proc_dointvec,
1569         },
1570 #endif
1571         {
1572                 .procname       = "am_droprate",
1573                 .data           = &sysctl_ip_vs_am_droprate,
1574                 .maxlen         = sizeof(int),
1575                 .mode           = 0644,
1576                 .proc_handler   = proc_dointvec,
1577         },
1578         {
1579                 .procname       = "drop_entry",
1580                 .data           = &sysctl_ip_vs_drop_entry,
1581                 .maxlen         = sizeof(int),
1582                 .mode           = 0644,
1583                 .proc_handler   = proc_do_defense_mode,
1584         },
1585         {
1586                 .procname       = "drop_packet",
1587                 .data           = &sysctl_ip_vs_drop_packet,
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = proc_do_defense_mode,
1591         },
1592 #ifdef CONFIG_IP_VS_NFCT
1593         {
1594                 .procname       = "conntrack",
1595                 .data           = &sysctl_ip_vs_conntrack,
1596                 .maxlen         = sizeof(int),
1597                 .mode           = 0644,
1598                 .proc_handler   = &proc_dointvec,
1599         },
1600 #endif
1601         {
1602                 .procname       = "secure_tcp",
1603                 .data           = &sysctl_ip_vs_secure_tcp,
1604                 .maxlen         = sizeof(int),
1605                 .mode           = 0644,
1606                 .proc_handler   = proc_do_defense_mode,
1607         },
1608         {
1609                 .procname       = "snat_reroute",
1610                 .data           = &sysctl_ip_vs_snat_reroute,
1611                 .maxlen         = sizeof(int),
1612                 .mode           = 0644,
1613                 .proc_handler   = &proc_dointvec,
1614         },
1615 #if 0
1616         {
1617                 .procname       = "timeout_established",
1618                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1619                 .maxlen         = sizeof(int),
1620                 .mode           = 0644,
1621                 .proc_handler   = proc_dointvec_jiffies,
1622         },
1623         {
1624                 .procname       = "timeout_synsent",
1625                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1626                 .maxlen         = sizeof(int),
1627                 .mode           = 0644,
1628                 .proc_handler   = proc_dointvec_jiffies,
1629         },
1630         {
1631                 .procname       = "timeout_synrecv",
1632                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1633                 .maxlen         = sizeof(int),
1634                 .mode           = 0644,
1635                 .proc_handler   = proc_dointvec_jiffies,
1636         },
1637         {
1638                 .procname       = "timeout_finwait",
1639                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1640                 .maxlen         = sizeof(int),
1641                 .mode           = 0644,
1642                 .proc_handler   = proc_dointvec_jiffies,
1643         },
1644         {
1645                 .procname       = "timeout_timewait",
1646                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1647                 .maxlen         = sizeof(int),
1648                 .mode           = 0644,
1649                 .proc_handler   = proc_dointvec_jiffies,
1650         },
1651         {
1652                 .procname       = "timeout_close",
1653                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1654                 .maxlen         = sizeof(int),
1655                 .mode           = 0644,
1656                 .proc_handler   = proc_dointvec_jiffies,
1657         },
1658         {
1659                 .procname       = "timeout_closewait",
1660                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1661                 .maxlen         = sizeof(int),
1662                 .mode           = 0644,
1663                 .proc_handler   = proc_dointvec_jiffies,
1664         },
1665         {
1666                 .procname       = "timeout_lastack",
1667                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1668                 .maxlen         = sizeof(int),
1669                 .mode           = 0644,
1670                 .proc_handler   = proc_dointvec_jiffies,
1671         },
1672         {
1673                 .procname       = "timeout_listen",
1674                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1675                 .maxlen         = sizeof(int),
1676                 .mode           = 0644,
1677                 .proc_handler   = proc_dointvec_jiffies,
1678         },
1679         {
1680                 .procname       = "timeout_synack",
1681                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1682                 .maxlen         = sizeof(int),
1683                 .mode           = 0644,
1684                 .proc_handler   = proc_dointvec_jiffies,
1685         },
1686         {
1687                 .procname       = "timeout_udp",
1688                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1689                 .maxlen         = sizeof(int),
1690                 .mode           = 0644,
1691                 .proc_handler   = proc_dointvec_jiffies,
1692         },
1693         {
1694                 .procname       = "timeout_icmp",
1695                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1696                 .maxlen         = sizeof(int),
1697                 .mode           = 0644,
1698                 .proc_handler   = proc_dointvec_jiffies,
1699         },
1700 #endif
1701         {
1702                 .procname       = "cache_bypass",
1703                 .data           = &sysctl_ip_vs_cache_bypass,
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = proc_dointvec,
1707         },
1708         {
1709                 .procname       = "expire_nodest_conn",
1710                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1711                 .maxlen         = sizeof(int),
1712                 .mode           = 0644,
1713                 .proc_handler   = proc_dointvec,
1714         },
1715         {
1716                 .procname       = "expire_quiescent_template",
1717                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1718                 .maxlen         = sizeof(int),
1719                 .mode           = 0644,
1720                 .proc_handler   = proc_dointvec,
1721         },
1722         {
1723                 .procname       = "sync_threshold",
1724                 .data           = &sysctl_ip_vs_sync_threshold,
1725                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1726                 .mode           = 0644,
1727                 .proc_handler   = proc_do_sync_threshold,
1728         },
1729         {
1730                 .procname       = "nat_icmp_send",
1731                 .data           = &sysctl_ip_vs_nat_icmp_send,
1732                 .maxlen         = sizeof(int),
1733                 .mode           = 0644,
1734                 .proc_handler   = proc_dointvec,
1735         },
1736         { }
1737 };
1738
1739 const struct ctl_path net_vs_ctl_path[] = {
1740         { .procname = "net", },
1741         { .procname = "ipv4", },
1742         { .procname = "vs", },
1743         { }
1744 };
1745 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1746
1747 static struct ctl_table_header * sysctl_header;
1748
1749 #ifdef CONFIG_PROC_FS
1750
1751 struct ip_vs_iter {
1752         struct list_head *table;
1753         int bucket;
1754 };
1755
1756 /*
1757  *      Write the contents of the VS rule table to a PROCfs file.
1758  *      (It is kept just for backward compatibility)
1759  */
1760 static inline const char *ip_vs_fwd_name(unsigned flags)
1761 {
1762         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1763         case IP_VS_CONN_F_LOCALNODE:
1764                 return "Local";
1765         case IP_VS_CONN_F_TUNNEL:
1766                 return "Tunnel";
1767         case IP_VS_CONN_F_DROUTE:
1768                 return "Route";
1769         default:
1770                 return "Masq";
1771         }
1772 }
1773
1774
1775 /* Get the Nth entry in the two lists */
1776 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1777 {
1778         struct ip_vs_iter *iter = seq->private;
1779         int idx;
1780         struct ip_vs_service *svc;
1781
1782         /* look in hash by protocol */
1783         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1784                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1785                         if (pos-- == 0){
1786                                 iter->table = ip_vs_svc_table;
1787                                 iter->bucket = idx;
1788                                 return svc;
1789                         }
1790                 }
1791         }
1792
1793         /* keep looking in fwmark */
1794         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1795                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1796                         if (pos-- == 0) {
1797                                 iter->table = ip_vs_svc_fwm_table;
1798                                 iter->bucket = idx;
1799                                 return svc;
1800                         }
1801                 }
1802         }
1803
1804         return NULL;
1805 }
1806
1807 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1808 __acquires(__ip_vs_svc_lock)
1809 {
1810
1811         read_lock_bh(&__ip_vs_svc_lock);
1812         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1813 }
1814
1815
1816 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1817 {
1818         struct list_head *e;
1819         struct ip_vs_iter *iter;
1820         struct ip_vs_service *svc;
1821
1822         ++*pos;
1823         if (v == SEQ_START_TOKEN)
1824                 return ip_vs_info_array(seq,0);
1825
1826         svc = v;
1827         iter = seq->private;
1828
1829         if (iter->table == ip_vs_svc_table) {
1830                 /* next service in table hashed by protocol */
1831                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1832                         return list_entry(e, struct ip_vs_service, s_list);
1833
1834
1835                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1836                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1837                                             s_list) {
1838                                 return svc;
1839                         }
1840                 }
1841
1842                 iter->table = ip_vs_svc_fwm_table;
1843                 iter->bucket = -1;
1844                 goto scan_fwmark;
1845         }
1846
1847         /* next service in hashed by fwmark */
1848         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1849                 return list_entry(e, struct ip_vs_service, f_list);
1850
1851  scan_fwmark:
1852         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1853                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1854                                     f_list)
1855                         return svc;
1856         }
1857
1858         return NULL;
1859 }
1860
1861 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1862 __releases(__ip_vs_svc_lock)
1863 {
1864         read_unlock_bh(&__ip_vs_svc_lock);
1865 }
1866
1867
1868 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1869 {
1870         if (v == SEQ_START_TOKEN) {
1871                 seq_printf(seq,
1872                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1873                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1874                 seq_puts(seq,
1875                          "Prot LocalAddress:Port Scheduler Flags\n");
1876                 seq_puts(seq,
1877                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1878         } else {
1879                 const struct ip_vs_service *svc = v;
1880                 const struct ip_vs_iter *iter = seq->private;
1881                 const struct ip_vs_dest *dest;
1882
1883                 if (iter->table == ip_vs_svc_table) {
1884 #ifdef CONFIG_IP_VS_IPV6
1885                         if (svc->af == AF_INET6)
1886                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1887                                            ip_vs_proto_name(svc->protocol),
1888                                            &svc->addr.in6,
1889                                            ntohs(svc->port),
1890                                            svc->scheduler->name);
1891                         else
1892 #endif
1893                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1894                                            ip_vs_proto_name(svc->protocol),
1895                                            ntohl(svc->addr.ip),
1896                                            ntohs(svc->port),
1897                                            svc->scheduler->name,
1898                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1899                 } else {
1900                         seq_printf(seq, "FWM  %08X %s %s",
1901                                    svc->fwmark, svc->scheduler->name,
1902                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1903                 }
1904
1905                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1906                         seq_printf(seq, "persistent %d %08X\n",
1907                                 svc->timeout,
1908                                 ntohl(svc->netmask));
1909                 else
1910                         seq_putc(seq, '\n');
1911
1912                 list_for_each_entry(dest, &svc->destinations, n_list) {
1913 #ifdef CONFIG_IP_VS_IPV6
1914                         if (dest->af == AF_INET6)
1915                                 seq_printf(seq,
1916                                            "  -> [%pI6]:%04X"
1917                                            "      %-7s %-6d %-10d %-10d\n",
1918                                            &dest->addr.in6,
1919                                            ntohs(dest->port),
1920                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1921                                            atomic_read(&dest->weight),
1922                                            atomic_read(&dest->activeconns),
1923                                            atomic_read(&dest->inactconns));
1924                         else
1925 #endif
1926                                 seq_printf(seq,
1927                                            "  -> %08X:%04X      "
1928                                            "%-7s %-6d %-10d %-10d\n",
1929                                            ntohl(dest->addr.ip),
1930                                            ntohs(dest->port),
1931                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1932                                            atomic_read(&dest->weight),
1933                                            atomic_read(&dest->activeconns),
1934                                            atomic_read(&dest->inactconns));
1935
1936                 }
1937         }
1938         return 0;
1939 }
1940
1941 static const struct seq_operations ip_vs_info_seq_ops = {
1942         .start = ip_vs_info_seq_start,
1943         .next  = ip_vs_info_seq_next,
1944         .stop  = ip_vs_info_seq_stop,
1945         .show  = ip_vs_info_seq_show,
1946 };
1947
1948 static int ip_vs_info_open(struct inode *inode, struct file *file)
1949 {
1950         return seq_open_private(file, &ip_vs_info_seq_ops,
1951                         sizeof(struct ip_vs_iter));
1952 }
1953
1954 static const struct file_operations ip_vs_info_fops = {
1955         .owner   = THIS_MODULE,
1956         .open    = ip_vs_info_open,
1957         .read    = seq_read,
1958         .llseek  = seq_lseek,
1959         .release = seq_release_private,
1960 };
1961
1962 #endif
1963
1964 struct ip_vs_stats ip_vs_stats = {
1965         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1966 };
1967
1968 #ifdef CONFIG_PROC_FS
1969 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1970 {
1971
1972 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973         seq_puts(seq,
1974                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1975         seq_printf(seq,
1976                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1977
1978         spin_lock_bh(&ip_vs_stats.lock);
1979         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1980                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1981                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1982                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1983
1984 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1985         seq_puts(seq,
1986                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1987         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1988                         ip_vs_stats.ustats.cps,
1989                         ip_vs_stats.ustats.inpps,
1990                         ip_vs_stats.ustats.outpps,
1991                         ip_vs_stats.ustats.inbps,
1992                         ip_vs_stats.ustats.outbps);
1993         spin_unlock_bh(&ip_vs_stats.lock);
1994
1995         return 0;
1996 }
1997
1998 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1999 {
2000         return single_open(file, ip_vs_stats_show, NULL);
2001 }
2002
2003 static const struct file_operations ip_vs_stats_fops = {
2004         .owner = THIS_MODULE,
2005         .open = ip_vs_stats_seq_open,
2006         .read = seq_read,
2007         .llseek = seq_lseek,
2008         .release = single_release,
2009 };
2010
2011 #endif
2012
2013 /*
2014  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2015  */
2016 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2017 {
2018         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2019                   u->tcp_timeout,
2020                   u->tcp_fin_timeout,
2021                   u->udp_timeout);
2022
2023 #ifdef CONFIG_IP_VS_PROTO_TCP
2024         if (u->tcp_timeout) {
2025                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2026                         = u->tcp_timeout * HZ;
2027         }
2028
2029         if (u->tcp_fin_timeout) {
2030                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2031                         = u->tcp_fin_timeout * HZ;
2032         }
2033 #endif
2034
2035 #ifdef CONFIG_IP_VS_PROTO_UDP
2036         if (u->udp_timeout) {
2037                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2038                         = u->udp_timeout * HZ;
2039         }
2040 #endif
2041         return 0;
2042 }
2043
2044
2045 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2046 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2047 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2048                                  sizeof(struct ip_vs_dest_user))
2049 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2050 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2051 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2052
2053 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2054         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2055         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2056         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2057         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2058         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2059         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2060         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2061         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2062         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2063         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2064         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2065 };
2066
2067 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2068                                   struct ip_vs_service_user *usvc_compat)
2069 {
2070         memset(usvc, 0, sizeof(*usvc));
2071
2072         usvc->af                = AF_INET;
2073         usvc->protocol          = usvc_compat->protocol;
2074         usvc->addr.ip           = usvc_compat->addr;
2075         usvc->port              = usvc_compat->port;
2076         usvc->fwmark            = usvc_compat->fwmark;
2077
2078         /* Deep copy of sched_name is not needed here */
2079         usvc->sched_name        = usvc_compat->sched_name;
2080
2081         usvc->flags             = usvc_compat->flags;
2082         usvc->timeout           = usvc_compat->timeout;
2083         usvc->netmask           = usvc_compat->netmask;
2084 }
2085
2086 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2087                                    struct ip_vs_dest_user *udest_compat)
2088 {
2089         memset(udest, 0, sizeof(*udest));
2090
2091         udest->addr.ip          = udest_compat->addr;
2092         udest->port             = udest_compat->port;
2093         udest->conn_flags       = udest_compat->conn_flags;
2094         udest->weight           = udest_compat->weight;
2095         udest->u_threshold      = udest_compat->u_threshold;
2096         udest->l_threshold      = udest_compat->l_threshold;
2097 }
2098
2099 static int
2100 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2101 {
2102         int ret;
2103         unsigned char arg[MAX_ARG_LEN];
2104         struct ip_vs_service_user *usvc_compat;
2105         struct ip_vs_service_user_kern usvc;
2106         struct ip_vs_service *svc;
2107         struct ip_vs_dest_user *udest_compat;
2108         struct ip_vs_dest_user_kern udest;
2109
2110         if (!capable(CAP_NET_ADMIN))
2111                 return -EPERM;
2112
2113         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2114                 return -EINVAL;
2115         if (len < 0 || len >  MAX_ARG_LEN)
2116                 return -EINVAL;
2117         if (len != set_arglen[SET_CMDID(cmd)]) {
2118                 pr_err("set_ctl: len %u != %u\n",
2119                        len, set_arglen[SET_CMDID(cmd)]);
2120                 return -EINVAL;
2121         }
2122
2123         if (copy_from_user(arg, user, len) != 0)
2124                 return -EFAULT;
2125
2126         /* increase the module use count */
2127         ip_vs_use_count_inc();
2128
2129         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2130                 ret = -ERESTARTSYS;
2131                 goto out_dec;
2132         }
2133
2134         if (cmd == IP_VS_SO_SET_FLUSH) {
2135                 /* Flush the virtual service */
2136                 ret = ip_vs_flush();
2137                 goto out_unlock;
2138         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2139                 /* Set timeout values for (tcp tcpfin udp) */
2140                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2141                 goto out_unlock;
2142         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2143                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2144                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2145                 goto out_unlock;
2146         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2147                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2148                 ret = stop_sync_thread(dm->state);
2149                 goto out_unlock;
2150         }
2151
2152         usvc_compat = (struct ip_vs_service_user *)arg;
2153         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2154
2155         /* We only use the new structs internally, so copy userspace compat
2156          * structs to extended internal versions */
2157         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2158         ip_vs_copy_udest_compat(&udest, udest_compat);
2159
2160         if (cmd == IP_VS_SO_SET_ZERO) {
2161                 /* if no service address is set, zero counters in all */
2162                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2163                         ret = ip_vs_zero_all();
2164                         goto out_unlock;
2165                 }
2166         }
2167
2168         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2169         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2170             usvc.protocol != IPPROTO_SCTP) {
2171                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2172                        usvc.protocol, &usvc.addr.ip,
2173                        ntohs(usvc.port), usvc.sched_name);
2174                 ret = -EFAULT;
2175                 goto out_unlock;
2176         }
2177
2178         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2179         if (usvc.fwmark == 0)
2180                 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2181                                            &usvc.addr, usvc.port);
2182         else
2183                 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2184
2185         if (cmd != IP_VS_SO_SET_ADD
2186             && (svc == NULL || svc->protocol != usvc.protocol)) {
2187                 ret = -ESRCH;
2188                 goto out_unlock;
2189         }
2190
2191         switch (cmd) {
2192         case IP_VS_SO_SET_ADD:
2193                 if (svc != NULL)
2194                         ret = -EEXIST;
2195                 else
2196                         ret = ip_vs_add_service(&usvc, &svc);
2197                 break;
2198         case IP_VS_SO_SET_EDIT:
2199                 ret = ip_vs_edit_service(svc, &usvc);
2200                 break;
2201         case IP_VS_SO_SET_DEL:
2202                 ret = ip_vs_del_service(svc);
2203                 if (!ret)
2204                         goto out_unlock;
2205                 break;
2206         case IP_VS_SO_SET_ZERO:
2207                 ret = ip_vs_zero_service(svc);
2208                 break;
2209         case IP_VS_SO_SET_ADDDEST:
2210                 ret = ip_vs_add_dest(svc, &udest);
2211                 break;
2212         case IP_VS_SO_SET_EDITDEST:
2213                 ret = ip_vs_edit_dest(svc, &udest);
2214                 break;
2215         case IP_VS_SO_SET_DELDEST:
2216                 ret = ip_vs_del_dest(svc, &udest);
2217                 break;
2218         default:
2219                 ret = -EINVAL;
2220         }
2221
2222   out_unlock:
2223         mutex_unlock(&__ip_vs_mutex);
2224   out_dec:
2225         /* decrease the module use count */
2226         ip_vs_use_count_dec();
2227
2228         return ret;
2229 }
2230
2231
2232 static void
2233 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2234 {
2235         spin_lock_bh(&src->lock);
2236         memcpy(dst, &src->ustats, sizeof(*dst));
2237         spin_unlock_bh(&src->lock);
2238 }
2239
2240 static void
2241 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2242 {
2243         dst->protocol = src->protocol;
2244         dst->addr = src->addr.ip;
2245         dst->port = src->port;
2246         dst->fwmark = src->fwmark;
2247         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2248         dst->flags = src->flags;
2249         dst->timeout = src->timeout / HZ;
2250         dst->netmask = src->netmask;
2251         dst->num_dests = src->num_dests;
2252         ip_vs_copy_stats(&dst->stats, &src->stats);
2253 }
2254
2255 static inline int
2256 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2257                             struct ip_vs_get_services __user *uptr)
2258 {
2259         int idx, count=0;
2260         struct ip_vs_service *svc;
2261         struct ip_vs_service_entry entry;
2262         int ret = 0;
2263
2264         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2265                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2266                         /* Only expose IPv4 entries to old interface */
2267                         if (svc->af != AF_INET)
2268                                 continue;
2269
2270                         if (count >= get->num_services)
2271                                 goto out;
2272                         memset(&entry, 0, sizeof(entry));
2273                         ip_vs_copy_service(&entry, svc);
2274                         if (copy_to_user(&uptr->entrytable[count],
2275                                          &entry, sizeof(entry))) {
2276                                 ret = -EFAULT;
2277                                 goto out;
2278                         }
2279                         count++;
2280                 }
2281         }
2282
2283         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2284                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2285                         /* Only expose IPv4 entries to old interface */
2286                         if (svc->af != AF_INET)
2287                                 continue;
2288
2289                         if (count >= get->num_services)
2290                                 goto out;
2291                         memset(&entry, 0, sizeof(entry));
2292                         ip_vs_copy_service(&entry, svc);
2293                         if (copy_to_user(&uptr->entrytable[count],
2294                                          &entry, sizeof(entry))) {
2295                                 ret = -EFAULT;
2296                                 goto out;
2297                         }
2298                         count++;
2299                 }
2300         }
2301   out:
2302         return ret;
2303 }
2304
2305 static inline int
2306 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2307                          struct ip_vs_get_dests __user *uptr)
2308 {
2309         struct ip_vs_service *svc;
2310         union nf_inet_addr addr = { .ip = get->addr };
2311         int ret = 0;
2312
2313         if (get->fwmark)
2314                 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2315         else
2316                 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2317                                            get->port);
2318
2319         if (svc) {
2320                 int count = 0;
2321                 struct ip_vs_dest *dest;
2322                 struct ip_vs_dest_entry entry;
2323
2324                 list_for_each_entry(dest, &svc->destinations, n_list) {
2325                         if (count >= get->num_dests)
2326                                 break;
2327
2328                         entry.addr = dest->addr.ip;
2329                         entry.port = dest->port;
2330                         entry.conn_flags = atomic_read(&dest->conn_flags);
2331                         entry.weight = atomic_read(&dest->weight);
2332                         entry.u_threshold = dest->u_threshold;
2333                         entry.l_threshold = dest->l_threshold;
2334                         entry.activeconns = atomic_read(&dest->activeconns);
2335                         entry.inactconns = atomic_read(&dest->inactconns);
2336                         entry.persistconns = atomic_read(&dest->persistconns);
2337                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2338                         if (copy_to_user(&uptr->entrytable[count],
2339                                          &entry, sizeof(entry))) {
2340                                 ret = -EFAULT;
2341                                 break;
2342                         }
2343                         count++;
2344                 }
2345         } else
2346                 ret = -ESRCH;
2347         return ret;
2348 }
2349
2350 static inline void
2351 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2352 {
2353 #ifdef CONFIG_IP_VS_PROTO_TCP
2354         u->tcp_timeout =
2355                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2356         u->tcp_fin_timeout =
2357                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2358 #endif
2359 #ifdef CONFIG_IP_VS_PROTO_UDP
2360         u->udp_timeout =
2361                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2362 #endif
2363 }
2364
2365
2366 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2367 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2368 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2369 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2370 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2371 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2372 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2373
2374 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2375         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2376         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2377         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2378         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2379         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2380         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2381         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2382 };
2383
2384 static int
2385 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2386 {
2387         unsigned char arg[128];
2388         int ret = 0;
2389         unsigned int copylen;
2390
2391         if (!capable(CAP_NET_ADMIN))
2392                 return -EPERM;
2393
2394         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2395                 return -EINVAL;
2396
2397         if (*len < get_arglen[GET_CMDID(cmd)]) {
2398                 pr_err("get_ctl: len %u < %u\n",
2399                        *len, get_arglen[GET_CMDID(cmd)]);
2400                 return -EINVAL;
2401         }
2402
2403         copylen = get_arglen[GET_CMDID(cmd)];
2404         if (copylen > 128)
2405                 return -EINVAL;
2406
2407         if (copy_from_user(arg, user, copylen) != 0)
2408                 return -EFAULT;
2409
2410         if (mutex_lock_interruptible(&__ip_vs_mutex))
2411                 return -ERESTARTSYS;
2412
2413         switch (cmd) {
2414         case IP_VS_SO_GET_VERSION:
2415         {
2416                 char buf[64];
2417
2418                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2419                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2420                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2421                         ret = -EFAULT;
2422                         goto out;
2423                 }
2424                 *len = strlen(buf)+1;
2425         }
2426         break;
2427
2428         case IP_VS_SO_GET_INFO:
2429         {
2430                 struct ip_vs_getinfo info;
2431                 info.version = IP_VS_VERSION_CODE;
2432                 info.size = ip_vs_conn_tab_size;
2433                 info.num_services = ip_vs_num_services;
2434                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2435                         ret = -EFAULT;
2436         }
2437         break;
2438
2439         case IP_VS_SO_GET_SERVICES:
2440         {
2441                 struct ip_vs_get_services *get;
2442                 int size;
2443
2444                 get = (struct ip_vs_get_services *)arg;
2445                 size = sizeof(*get) +
2446                         sizeof(struct ip_vs_service_entry) * get->num_services;
2447                 if (*len != size) {
2448                         pr_err("length: %u != %u\n", *len, size);
2449                         ret = -EINVAL;
2450                         goto out;
2451                 }
2452                 ret = __ip_vs_get_service_entries(get, user);
2453         }
2454         break;
2455
2456         case IP_VS_SO_GET_SERVICE:
2457         {
2458                 struct ip_vs_service_entry *entry;
2459                 struct ip_vs_service *svc;
2460                 union nf_inet_addr addr;
2461
2462                 entry = (struct ip_vs_service_entry *)arg;
2463                 addr.ip = entry->addr;
2464                 if (entry->fwmark)
2465                         svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2466                 else
2467                         svc = __ip_vs_service_find(AF_INET, entry->protocol,
2468                                                    &addr, entry->port);
2469                 if (svc) {
2470                         ip_vs_copy_service(entry, svc);
2471                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2472                                 ret = -EFAULT;
2473                 } else
2474                         ret = -ESRCH;
2475         }
2476         break;
2477
2478         case IP_VS_SO_GET_DESTS:
2479         {
2480                 struct ip_vs_get_dests *get;
2481                 int size;
2482
2483                 get = (struct ip_vs_get_dests *)arg;
2484                 size = sizeof(*get) +
2485                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2486                 if (*len != size) {
2487                         pr_err("length: %u != %u\n", *len, size);
2488                         ret = -EINVAL;
2489                         goto out;
2490                 }
2491                 ret = __ip_vs_get_dest_entries(get, user);
2492         }
2493         break;
2494
2495         case IP_VS_SO_GET_TIMEOUT:
2496         {
2497                 struct ip_vs_timeout_user t;
2498
2499                 __ip_vs_get_timeouts(&t);
2500                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2501                         ret = -EFAULT;
2502         }
2503         break;
2504
2505         case IP_VS_SO_GET_DAEMON:
2506         {
2507                 struct ip_vs_daemon_user d[2];
2508
2509                 memset(&d, 0, sizeof(d));
2510                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2511                         d[0].state = IP_VS_STATE_MASTER;
2512                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2513                         d[0].syncid = ip_vs_master_syncid;
2514                 }
2515                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2516                         d[1].state = IP_VS_STATE_BACKUP;
2517                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2518                         d[1].syncid = ip_vs_backup_syncid;
2519                 }
2520                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2521                         ret = -EFAULT;
2522         }
2523         break;
2524
2525         default:
2526                 ret = -EINVAL;
2527         }
2528
2529   out:
2530         mutex_unlock(&__ip_vs_mutex);
2531         return ret;
2532 }
2533
2534
2535 static struct nf_sockopt_ops ip_vs_sockopts = {
2536         .pf             = PF_INET,
2537         .set_optmin     = IP_VS_BASE_CTL,
2538         .set_optmax     = IP_VS_SO_SET_MAX+1,
2539         .set            = do_ip_vs_set_ctl,
2540         .get_optmin     = IP_VS_BASE_CTL,
2541         .get_optmax     = IP_VS_SO_GET_MAX+1,
2542         .get            = do_ip_vs_get_ctl,
2543         .owner          = THIS_MODULE,
2544 };
2545
2546 /*
2547  * Generic Netlink interface
2548  */
2549
2550 /* IPVS genetlink family */
2551 static struct genl_family ip_vs_genl_family = {
2552         .id             = GENL_ID_GENERATE,
2553         .hdrsize        = 0,
2554         .name           = IPVS_GENL_NAME,
2555         .version        = IPVS_GENL_VERSION,
2556         .maxattr        = IPVS_CMD_MAX,
2557 };
2558
2559 /* Policy used for first-level command attributes */
2560 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2561         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2562         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2563         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2564         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2565         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2566         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2567 };
2568
2569 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2570 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2571         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2572         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2573                                             .len = IP_VS_IFNAME_MAXLEN },
2574         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2575 };
2576
2577 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2578 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2579         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2580         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2581         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2582                                             .len = sizeof(union nf_inet_addr) },
2583         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2584         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2585         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2586                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2587         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2588                                             .len = IP_VS_PENAME_MAXLEN },
2589         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2590                                             .len = sizeof(struct ip_vs_flags) },
2591         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2592         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2593         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2594 };
2595
2596 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2597 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2598         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2599                                             .len = sizeof(union nf_inet_addr) },
2600         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2601         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2602         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2603         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2604         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2605         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2606         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2607         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2608         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2609 };
2610
2611 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2612                                  struct ip_vs_stats *stats)
2613 {
2614         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2615         if (!nl_stats)
2616                 return -EMSGSIZE;
2617
2618         spin_lock_bh(&stats->lock);
2619
2620         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2621         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2622         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2623         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2624         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2625         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2626         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2627         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2628         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2629         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2630
2631         spin_unlock_bh(&stats->lock);
2632
2633         nla_nest_end(skb, nl_stats);
2634
2635         return 0;
2636
2637 nla_put_failure:
2638         spin_unlock_bh(&stats->lock);
2639         nla_nest_cancel(skb, nl_stats);
2640         return -EMSGSIZE;
2641 }
2642
2643 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2644                                    struct ip_vs_service *svc)
2645 {
2646         struct nlattr *nl_service;
2647         struct ip_vs_flags flags = { .flags = svc->flags,
2648                                      .mask = ~0 };
2649
2650         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2651         if (!nl_service)
2652                 return -EMSGSIZE;
2653
2654         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2655
2656         if (svc->fwmark) {
2657                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2658         } else {
2659                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2660                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2661                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2662         }
2663
2664         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2665         if (svc->pe)
2666                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2667         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2668         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2669         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2670
2671         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2672                 goto nla_put_failure;
2673
2674         nla_nest_end(skb, nl_service);
2675
2676         return 0;
2677
2678 nla_put_failure:
2679         nla_nest_cancel(skb, nl_service);
2680         return -EMSGSIZE;
2681 }
2682
2683 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2684                                    struct ip_vs_service *svc,
2685                                    struct netlink_callback *cb)
2686 {
2687         void *hdr;
2688
2689         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2690                           &ip_vs_genl_family, NLM_F_MULTI,
2691                           IPVS_CMD_NEW_SERVICE);
2692         if (!hdr)
2693                 return -EMSGSIZE;
2694
2695         if (ip_vs_genl_fill_service(skb, svc) < 0)
2696                 goto nla_put_failure;
2697
2698         return genlmsg_end(skb, hdr);
2699
2700 nla_put_failure:
2701         genlmsg_cancel(skb, hdr);
2702         return -EMSGSIZE;
2703 }
2704
2705 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2706                                     struct netlink_callback *cb)
2707 {
2708         int idx = 0, i;
2709         int start = cb->args[0];
2710         struct ip_vs_service *svc;
2711
2712         mutex_lock(&__ip_vs_mutex);
2713         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2714                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2715                         if (++idx <= start)
2716                                 continue;
2717                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2718                                 idx--;
2719                                 goto nla_put_failure;
2720                         }
2721                 }
2722         }
2723
2724         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2725                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2726                         if (++idx <= start)
2727                                 continue;
2728                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2729                                 idx--;
2730                                 goto nla_put_failure;
2731                         }
2732                 }
2733         }
2734
2735 nla_put_failure:
2736         mutex_unlock(&__ip_vs_mutex);
2737         cb->args[0] = idx;
2738
2739         return skb->len;
2740 }
2741
2742 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2743                                     struct nlattr *nla, int full_entry,
2744                                     struct ip_vs_service **ret_svc)
2745 {
2746         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2747         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2748         struct ip_vs_service *svc;
2749
2750         /* Parse mandatory identifying service fields first */
2751         if (nla == NULL ||
2752             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2753                 return -EINVAL;
2754
2755         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2756         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2757         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2758         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2759         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2760
2761         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2762                 return -EINVAL;
2763
2764         memset(usvc, 0, sizeof(*usvc));
2765
2766         usvc->af = nla_get_u16(nla_af);
2767 #ifdef CONFIG_IP_VS_IPV6
2768         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2769 #else
2770         if (usvc->af != AF_INET)
2771 #endif
2772                 return -EAFNOSUPPORT;
2773
2774         if (nla_fwmark) {
2775                 usvc->protocol = IPPROTO_TCP;
2776                 usvc->fwmark = nla_get_u32(nla_fwmark);
2777         } else {
2778                 usvc->protocol = nla_get_u16(nla_protocol);
2779                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2780                 usvc->port = nla_get_u16(nla_port);
2781                 usvc->fwmark = 0;
2782         }
2783
2784         if (usvc->fwmark)
2785                 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2786         else
2787                 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2788                                            &usvc->addr, usvc->port);
2789         *ret_svc = svc;
2790
2791         /* If a full entry was requested, check for the additional fields */
2792         if (full_entry) {
2793                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2794                               *nla_netmask;
2795                 struct ip_vs_flags flags;
2796
2797                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2798                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2799                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2800                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2801                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2802
2803                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2804                         return -EINVAL;
2805
2806                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2807
2808                 /* prefill flags from service if it already exists */
2809                 if (svc)
2810                         usvc->flags = svc->flags;
2811
2812                 /* set new flags from userland */
2813                 usvc->flags = (usvc->flags & ~flags.mask) |
2814                               (flags.flags & flags.mask);
2815                 usvc->sched_name = nla_data(nla_sched);
2816                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2817                 usvc->timeout = nla_get_u32(nla_timeout);
2818                 usvc->netmask = nla_get_u32(nla_netmask);
2819         }
2820
2821         return 0;
2822 }
2823
2824 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2825 {
2826         struct ip_vs_service_user_kern usvc;
2827         struct ip_vs_service *svc;
2828         int ret;
2829
2830         ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2831         return ret ? ERR_PTR(ret) : svc;
2832 }
2833
2834 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2835 {
2836         struct nlattr *nl_dest;
2837
2838         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2839         if (!nl_dest)
2840                 return -EMSGSIZE;
2841
2842         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2843         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2844
2845         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2846                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2847         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2848         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2849         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2850         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2851                     atomic_read(&dest->activeconns));
2852         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2853                     atomic_read(&dest->inactconns));
2854         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2855                     atomic_read(&dest->persistconns));
2856
2857         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2858                 goto nla_put_failure;
2859
2860         nla_nest_end(skb, nl_dest);
2861
2862         return 0;
2863
2864 nla_put_failure:
2865         nla_nest_cancel(skb, nl_dest);
2866         return -EMSGSIZE;
2867 }
2868
2869 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2870                                 struct netlink_callback *cb)
2871 {
2872         void *hdr;
2873
2874         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2875                           &ip_vs_genl_family, NLM_F_MULTI,
2876                           IPVS_CMD_NEW_DEST);
2877         if (!hdr)
2878                 return -EMSGSIZE;
2879
2880         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2881                 goto nla_put_failure;
2882
2883         return genlmsg_end(skb, hdr);
2884
2885 nla_put_failure:
2886         genlmsg_cancel(skb, hdr);
2887         return -EMSGSIZE;
2888 }
2889
2890 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2891                                  struct netlink_callback *cb)
2892 {
2893         int idx = 0;
2894         int start = cb->args[0];
2895         struct ip_vs_service *svc;
2896         struct ip_vs_dest *dest;
2897         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2898
2899         mutex_lock(&__ip_vs_mutex);
2900
2901         /* Try to find the service for which to dump destinations */
2902         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2903                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2904                 goto out_err;
2905
2906         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2907         if (IS_ERR(svc) || svc == NULL)
2908                 goto out_err;
2909
2910         /* Dump the destinations */
2911         list_for_each_entry(dest, &svc->destinations, n_list) {
2912                 if (++idx <= start)
2913                         continue;
2914                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2915                         idx--;
2916                         goto nla_put_failure;
2917                 }
2918         }
2919
2920 nla_put_failure:
2921         cb->args[0] = idx;
2922
2923 out_err:
2924         mutex_unlock(&__ip_vs_mutex);
2925
2926         return skb->len;
2927 }
2928
2929 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2930                                  struct nlattr *nla, int full_entry)
2931 {
2932         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2933         struct nlattr *nla_addr, *nla_port;
2934
2935         /* Parse mandatory identifying destination fields first */
2936         if (nla == NULL ||
2937             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2938                 return -EINVAL;
2939
2940         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2941         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2942
2943         if (!(nla_addr && nla_port))
2944                 return -EINVAL;
2945
2946         memset(udest, 0, sizeof(*udest));
2947
2948         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2949         udest->port = nla_get_u16(nla_port);
2950
2951         /* If a full entry was requested, check for the additional fields */
2952         if (full_entry) {
2953                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2954                               *nla_l_thresh;
2955
2956                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2957                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2958                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2959                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2960
2961                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2962                         return -EINVAL;
2963
2964                 udest->conn_flags = nla_get_u32(nla_fwd)
2965                                     & IP_VS_CONN_F_FWD_MASK;
2966                 udest->weight = nla_get_u32(nla_weight);
2967                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2968                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2969         }
2970
2971         return 0;
2972 }
2973
2974 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2975                                   const char *mcast_ifn, __be32 syncid)
2976 {
2977         struct nlattr *nl_daemon;
2978
2979         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2980         if (!nl_daemon)
2981                 return -EMSGSIZE;
2982
2983         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2984         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2985         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2986
2987         nla_nest_end(skb, nl_daemon);
2988
2989         return 0;
2990
2991 nla_put_failure:
2992         nla_nest_cancel(skb, nl_daemon);
2993         return -EMSGSIZE;
2994 }
2995
2996 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2997                                   const char *mcast_ifn, __be32 syncid,
2998                                   struct netlink_callback *cb)
2999 {
3000         void *hdr;
3001         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3002                           &ip_vs_genl_family, NLM_F_MULTI,
3003                           IPVS_CMD_NEW_DAEMON);
3004         if (!hdr)
3005                 return -EMSGSIZE;
3006
3007         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3008                 goto nla_put_failure;
3009
3010         return genlmsg_end(skb, hdr);
3011
3012 nla_put_failure:
3013         genlmsg_cancel(skb, hdr);
3014         return -EMSGSIZE;
3015 }
3016
3017 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3018                                    struct netlink_callback *cb)
3019 {
3020         mutex_lock(&__ip_vs_mutex);
3021         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3022                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3023                                            ip_vs_master_mcast_ifn,
3024                                            ip_vs_master_syncid, cb) < 0)
3025                         goto nla_put_failure;
3026
3027                 cb->args[0] = 1;
3028         }
3029
3030         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3031                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3032                                            ip_vs_backup_mcast_ifn,
3033                                            ip_vs_backup_syncid, cb) < 0)
3034                         goto nla_put_failure;
3035
3036                 cb->args[1] = 1;
3037         }
3038
3039 nla_put_failure:
3040         mutex_unlock(&__ip_vs_mutex);
3041
3042         return skb->len;
3043 }
3044
3045 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3046 {
3047         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3048               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3049               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3050                 return -EINVAL;
3051
3052         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3053                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3054                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3055 }
3056
3057 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3058 {
3059         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3060                 return -EINVAL;
3061
3062         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3063 }
3064
3065 static int ip_vs_genl_set_config(struct nlattr **attrs)
3066 {
3067         struct ip_vs_timeout_user t;
3068
3069         __ip_vs_get_timeouts(&t);
3070
3071         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3072                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3073
3074         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3075                 t.tcp_fin_timeout =
3076                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3077
3078         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3079                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3080
3081         return ip_vs_set_timeout(&t);
3082 }
3083
3084 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3085 {
3086         struct ip_vs_service *svc = NULL;
3087         struct ip_vs_service_user_kern usvc;
3088         struct ip_vs_dest_user_kern udest;
3089         int ret = 0, cmd;
3090         int need_full_svc = 0, need_full_dest = 0;
3091
3092         cmd = info->genlhdr->cmd;
3093
3094         mutex_lock(&__ip_vs_mutex);
3095
3096         if (cmd == IPVS_CMD_FLUSH) {
3097                 ret = ip_vs_flush();
3098                 goto out;
3099         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3100                 ret = ip_vs_genl_set_config(info->attrs);
3101                 goto out;
3102         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3103                    cmd == IPVS_CMD_DEL_DAEMON) {
3104
3105                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3106
3107                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3108                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3109                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3110                                      ip_vs_daemon_policy)) {
3111                         ret = -EINVAL;
3112                         goto out;
3113                 }
3114
3115                 if (cmd == IPVS_CMD_NEW_DAEMON)
3116                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3117                 else
3118                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3119                 goto out;
3120         } else if (cmd == IPVS_CMD_ZERO &&
3121                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3122                 ret = ip_vs_zero_all();
3123                 goto out;
3124         }
3125
3126         /* All following commands require a service argument, so check if we
3127          * received a valid one. We need a full service specification when
3128          * adding / editing a service. Only identifying members otherwise. */
3129         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3130                 need_full_svc = 1;
3131
3132         ret = ip_vs_genl_parse_service(&usvc,
3133                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3134                                        need_full_svc, &svc);
3135         if (ret)
3136                 goto out;
3137
3138         /* Unless we're adding a new service, the service must already exist */
3139         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3140                 ret = -ESRCH;
3141                 goto out;
3142         }
3143
3144         /* Destination commands require a valid destination argument. For
3145          * adding / editing a destination, we need a full destination
3146          * specification. */
3147         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3148             cmd == IPVS_CMD_DEL_DEST) {
3149                 if (cmd != IPVS_CMD_DEL_DEST)
3150                         need_full_dest = 1;
3151
3152                 ret = ip_vs_genl_parse_dest(&udest,
3153                                             info->attrs[IPVS_CMD_ATTR_DEST],
3154                                             need_full_dest);
3155                 if (ret)
3156                         goto out;
3157         }
3158
3159         switch (cmd) {
3160         case IPVS_CMD_NEW_SERVICE:
3161                 if (svc == NULL)
3162                         ret = ip_vs_add_service(&usvc, &svc);
3163                 else
3164                         ret = -EEXIST;
3165                 break;
3166         case IPVS_CMD_SET_SERVICE:
3167                 ret = ip_vs_edit_service(svc, &usvc);
3168                 break;
3169         case IPVS_CMD_DEL_SERVICE:
3170                 ret = ip_vs_del_service(svc);
3171                 /* do not use svc, it can be freed */
3172                 break;
3173         case IPVS_CMD_NEW_DEST:
3174                 ret = ip_vs_add_dest(svc, &udest);
3175                 break;
3176         case IPVS_CMD_SET_DEST:
3177                 ret = ip_vs_edit_dest(svc, &udest);
3178                 break;
3179         case IPVS_CMD_DEL_DEST:
3180                 ret = ip_vs_del_dest(svc, &udest);
3181                 break;
3182         case IPVS_CMD_ZERO:
3183                 ret = ip_vs_zero_service(svc);
3184                 break;
3185         default:
3186                 ret = -EINVAL;
3187         }
3188
3189 out:
3190         mutex_unlock(&__ip_vs_mutex);
3191
3192         return ret;
3193 }
3194
3195 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3196 {
3197         struct sk_buff *msg;
3198         void *reply;
3199         int ret, cmd, reply_cmd;
3200
3201         cmd = info->genlhdr->cmd;
3202
3203         if (cmd == IPVS_CMD_GET_SERVICE)
3204                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3205         else if (cmd == IPVS_CMD_GET_INFO)
3206                 reply_cmd = IPVS_CMD_SET_INFO;
3207         else if (cmd == IPVS_CMD_GET_CONFIG)
3208                 reply_cmd = IPVS_CMD_SET_CONFIG;
3209         else {
3210                 pr_err("unknown Generic Netlink command\n");
3211                 return -EINVAL;
3212         }
3213
3214         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3215         if (!msg)
3216                 return -ENOMEM;
3217
3218         mutex_lock(&__ip_vs_mutex);
3219
3220         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3221         if (reply == NULL)
3222                 goto nla_put_failure;
3223
3224         switch (cmd) {
3225         case IPVS_CMD_GET_SERVICE:
3226         {
3227                 struct ip_vs_service *svc;
3228
3229                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3230                 if (IS_ERR(svc)) {
3231                         ret = PTR_ERR(svc);
3232                         goto out_err;
3233                 } else if (svc) {
3234                         ret = ip_vs_genl_fill_service(msg, svc);
3235                         if (ret)
3236                                 goto nla_put_failure;
3237                 } else {
3238                         ret = -ESRCH;
3239                         goto out_err;
3240                 }
3241
3242                 break;
3243         }
3244
3245         case IPVS_CMD_GET_CONFIG:
3246         {
3247                 struct ip_vs_timeout_user t;
3248
3249                 __ip_vs_get_timeouts(&t);
3250 #ifdef CONFIG_IP_VS_PROTO_TCP
3251                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3252                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3253                             t.tcp_fin_timeout);
3254 #endif
3255 #ifdef CONFIG_IP_VS_PROTO_UDP
3256                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3257 #endif
3258
3259                 break;
3260         }
3261
3262         case IPVS_CMD_GET_INFO:
3263                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3264                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3265                             ip_vs_conn_tab_size);
3266                 break;
3267         }
3268
3269         genlmsg_end(msg, reply);
3270         ret = genlmsg_reply(msg, info);
3271         goto out;
3272
3273 nla_put_failure:
3274         pr_err("not enough space in Netlink message\n");
3275         ret = -EMSGSIZE;
3276
3277 out_err:
3278         nlmsg_free(msg);
3279 out:
3280         mutex_unlock(&__ip_vs_mutex);
3281
3282         return ret;
3283 }
3284
3285
3286 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3287         {
3288                 .cmd    = IPVS_CMD_NEW_SERVICE,
3289                 .flags  = GENL_ADMIN_PERM,
3290                 .policy = ip_vs_cmd_policy,
3291                 .doit   = ip_vs_genl_set_cmd,
3292         },
3293         {
3294                 .cmd    = IPVS_CMD_SET_SERVICE,
3295                 .flags  = GENL_ADMIN_PERM,
3296                 .policy = ip_vs_cmd_policy,
3297                 .doit   = ip_vs_genl_set_cmd,
3298         },
3299         {
3300                 .cmd    = IPVS_CMD_DEL_SERVICE,
3301                 .flags  = GENL_ADMIN_PERM,
3302                 .policy = ip_vs_cmd_policy,
3303                 .doit   = ip_vs_genl_set_cmd,
3304         },
3305         {
3306                 .cmd    = IPVS_CMD_GET_SERVICE,
3307                 .flags  = GENL_ADMIN_PERM,
3308                 .doit   = ip_vs_genl_get_cmd,
3309                 .dumpit = ip_vs_genl_dump_services,
3310                 .policy = ip_vs_cmd_policy,
3311         },
3312         {
3313                 .cmd    = IPVS_CMD_NEW_DEST,
3314                 .flags  = GENL_ADMIN_PERM,
3315                 .policy = ip_vs_cmd_policy,
3316                 .doit   = ip_vs_genl_set_cmd,
3317         },
3318         {
3319                 .cmd    = IPVS_CMD_SET_DEST,
3320                 .flags  = GENL_ADMIN_PERM,
3321                 .policy = ip_vs_cmd_policy,
3322                 .doit   = ip_vs_genl_set_cmd,
3323         },
3324         {
3325                 .cmd    = IPVS_CMD_DEL_DEST,
3326                 .flags  = GENL_ADMIN_PERM,
3327                 .policy = ip_vs_cmd_policy,
3328                 .doit   = ip_vs_genl_set_cmd,
3329         },
3330         {
3331                 .cmd    = IPVS_CMD_GET_DEST,
3332                 .flags  = GENL_ADMIN_PERM,
3333                 .policy = ip_vs_cmd_policy,
3334                 .dumpit = ip_vs_genl_dump_dests,
3335         },
3336         {
3337                 .cmd    = IPVS_CMD_NEW_DAEMON,
3338                 .flags  = GENL_ADMIN_PERM,
3339                 .policy = ip_vs_cmd_policy,
3340                 .doit   = ip_vs_genl_set_cmd,
3341         },
3342         {
3343                 .cmd    = IPVS_CMD_DEL_DAEMON,
3344                 .flags  = GENL_ADMIN_PERM,
3345                 .policy = ip_vs_cmd_policy,
3346                 .doit   = ip_vs_genl_set_cmd,
3347         },
3348         {
3349                 .cmd    = IPVS_CMD_GET_DAEMON,
3350                 .flags  = GENL_ADMIN_PERM,
3351                 .dumpit = ip_vs_genl_dump_daemons,
3352         },
3353         {
3354                 .cmd    = IPVS_CMD_SET_CONFIG,
3355                 .flags  = GENL_ADMIN_PERM,
3356                 .policy = ip_vs_cmd_policy,
3357                 .doit   = ip_vs_genl_set_cmd,
3358         },
3359         {
3360                 .cmd    = IPVS_CMD_GET_CONFIG,
3361                 .flags  = GENL_ADMIN_PERM,
3362                 .doit   = ip_vs_genl_get_cmd,
3363         },
3364         {
3365                 .cmd    = IPVS_CMD_GET_INFO,
3366                 .flags  = GENL_ADMIN_PERM,
3367                 .doit   = ip_vs_genl_get_cmd,
3368         },
3369         {
3370                 .cmd    = IPVS_CMD_ZERO,
3371                 .flags  = GENL_ADMIN_PERM,
3372                 .policy = ip_vs_cmd_policy,
3373                 .doit   = ip_vs_genl_set_cmd,
3374         },
3375         {
3376                 .cmd    = IPVS_CMD_FLUSH,
3377                 .flags  = GENL_ADMIN_PERM,
3378                 .doit   = ip_vs_genl_set_cmd,
3379         },
3380 };
3381
3382 static int __init ip_vs_genl_register(void)
3383 {
3384         return genl_register_family_with_ops(&ip_vs_genl_family,
3385                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3386 }
3387
3388 static void ip_vs_genl_unregister(void)
3389 {
3390         genl_unregister_family(&ip_vs_genl_family);
3391 }
3392
3393 /* End of Generic Netlink interface definitions */
3394
3395
3396 int __init ip_vs_control_init(void)
3397 {
3398         int ret;
3399         int idx;
3400
3401         EnterFunction(2);
3402
3403         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3404         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3405                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3406                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3407         }
3408         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3409                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3410         }
3411         smp_wmb();
3412
3413         ret = nf_register_sockopt(&ip_vs_sockopts);
3414         if (ret) {
3415                 pr_err("cannot register sockopt.\n");
3416                 return ret;
3417         }
3418
3419         ret = ip_vs_genl_register();
3420         if (ret) {
3421                 pr_err("cannot register Generic Netlink interface.\n");
3422                 nf_unregister_sockopt(&ip_vs_sockopts);
3423                 return ret;
3424         }
3425
3426         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3427         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3428
3429         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3430
3431         ip_vs_new_estimator(&ip_vs_stats);
3432
3433         /* Hook the defense timer */
3434         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3435
3436         LeaveFunction(2);
3437         return 0;
3438 }
3439
3440
3441 void ip_vs_control_cleanup(void)
3442 {
3443         EnterFunction(2);
3444         ip_vs_trash_cleanup();
3445         cancel_rearming_delayed_work(&defense_work);
3446         cancel_work_sync(&defense_work.work);
3447         ip_vs_kill_estimator(&ip_vs_stats);
3448         unregister_sysctl_table(sysctl_header);
3449         proc_net_remove(&init_net, "ip_vs_stats");
3450         proc_net_remove(&init_net, "ip_vs");
3451         ip_vs_genl_unregister();
3452         nf_unregister_sockopt(&ip_vs_sockopts);
3453         LeaveFunction(2);
3454 }