diff options
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sync.c | 431 |
1 files changed, 172 insertions, 259 deletions
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 2d4a86f73325..45e9bd96c286 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c | |||
@@ -27,10 +27,12 @@ | |||
27 | #include <linux/in.h> | 27 | #include <linux/in.h> |
28 | #include <linux/igmp.h> /* for ip_mc_join_group */ | 28 | #include <linux/igmp.h> /* for ip_mc_join_group */ |
29 | #include <linux/udp.h> | 29 | #include <linux/udp.h> |
30 | #include <linux/err.h> | ||
31 | #include <linux/kthread.h> | ||
32 | #include <linux/wait.h> | ||
30 | 33 | ||
31 | #include <net/ip.h> | 34 | #include <net/ip.h> |
32 | #include <net/sock.h> | 35 | #include <net/sock.h> |
33 | #include <asm/uaccess.h> /* for get_fs and set_fs */ | ||
34 | 36 | ||
35 | #include <net/ip_vs.h> | 37 | #include <net/ip_vs.h> |
36 | 38 | ||
@@ -66,8 +68,8 @@ struct ip_vs_sync_conn_options { | |||
66 | }; | 68 | }; |
67 | 69 | ||
68 | struct ip_vs_sync_thread_data { | 70 | struct ip_vs_sync_thread_data { |
69 | struct completion *startup; | 71 | struct socket *sock; |
70 | int state; | 72 | char *buf; |
71 | }; | 73 | }; |
72 | 74 | ||
73 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) | 75 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) |
@@ -138,18 +140,19 @@ volatile int ip_vs_backup_syncid = 0; | |||
138 | char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 140 | char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; |
139 | char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 141 | char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; |
140 | 142 | ||
141 | /* multicast addr */ | 143 | /* sync daemon tasks */ |
142 | static struct sockaddr_in mcast_addr; | 144 | static struct task_struct *sync_master_thread; |
145 | static struct task_struct *sync_backup_thread; | ||
143 | 146 | ||
147 | /* multicast addr */ | ||
148 | static struct sockaddr_in mcast_addr = { | ||
149 | .sin_family = AF_INET, | ||
150 | .sin_port = __constant_htons(IP_VS_SYNC_PORT), | ||
151 | .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), | ||
152 | }; | ||
144 | 153 | ||
145 | static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | ||
146 | { | ||
147 | spin_lock(&ip_vs_sync_lock); | ||
148 | list_add_tail(&sb->list, &ip_vs_sync_queue); | ||
149 | spin_unlock(&ip_vs_sync_lock); | ||
150 | } | ||
151 | 154 | ||
152 | static inline struct ip_vs_sync_buff * sb_dequeue(void) | 155 | static inline struct ip_vs_sync_buff *sb_dequeue(void) |
153 | { | 156 | { |
154 | struct ip_vs_sync_buff *sb; | 157 | struct ip_vs_sync_buff *sb; |
155 | 158 | ||
@@ -193,6 +196,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) | |||
193 | kfree(sb); | 196 | kfree(sb); |
194 | } | 197 | } |
195 | 198 | ||
199 | static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | ||
200 | { | ||
201 | spin_lock(&ip_vs_sync_lock); | ||
202 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) | ||
203 | list_add_tail(&sb->list, &ip_vs_sync_queue); | ||
204 | else | ||
205 | ip_vs_sync_buff_release(sb); | ||
206 | spin_unlock(&ip_vs_sync_lock); | ||
207 | } | ||
208 | |||
196 | /* | 209 | /* |
197 | * Get the current sync buffer if it has been created for more | 210 | * Get the current sync buffer if it has been created for more |
198 | * than the specified time or the specified time is zero. | 211 | * than the specified time or the specified time is zero. |
@@ -572,14 +585,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) | |||
572 | static struct socket * make_send_sock(void) | 585 | static struct socket * make_send_sock(void) |
573 | { | 586 | { |
574 | struct socket *sock; | 587 | struct socket *sock; |
588 | int result; | ||
575 | 589 | ||
576 | /* First create a socket */ | 590 | /* First create a socket */ |
577 | if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { | 591 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
592 | if (result < 0) { | ||
578 | IP_VS_ERR("Error during creation of socket; terminating\n"); | 593 | IP_VS_ERR("Error during creation of socket; terminating\n"); |
579 | return NULL; | 594 | return ERR_PTR(result); |
580 | } | 595 | } |
581 | 596 | ||
582 | if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { | 597 | result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); |
598 | if (result < 0) { | ||
583 | IP_VS_ERR("Error setting outbound mcast interface\n"); | 599 | IP_VS_ERR("Error setting outbound mcast interface\n"); |
584 | goto error; | 600 | goto error; |
585 | } | 601 | } |
@@ -587,14 +603,15 @@ static struct socket * make_send_sock(void) | |||
587 | set_mcast_loop(sock->sk, 0); | 603 | set_mcast_loop(sock->sk, 0); |
588 | set_mcast_ttl(sock->sk, 1); | 604 | set_mcast_ttl(sock->sk, 1); |
589 | 605 | ||
590 | if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { | 606 | result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); |
607 | if (result < 0) { | ||
591 | IP_VS_ERR("Error binding address of the mcast interface\n"); | 608 | IP_VS_ERR("Error binding address of the mcast interface\n"); |
592 | goto error; | 609 | goto error; |
593 | } | 610 | } |
594 | 611 | ||
595 | if (sock->ops->connect(sock, | 612 | result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, |
596 | (struct sockaddr*)&mcast_addr, | 613 | sizeof(struct sockaddr), 0); |
597 | sizeof(struct sockaddr), 0) < 0) { | 614 | if (result < 0) { |
598 | IP_VS_ERR("Error connecting to the multicast addr\n"); | 615 | IP_VS_ERR("Error connecting to the multicast addr\n"); |
599 | goto error; | 616 | goto error; |
600 | } | 617 | } |
@@ -603,7 +620,7 @@ static struct socket * make_send_sock(void) | |||
603 | 620 | ||
604 | error: | 621 | error: |
605 | sock_release(sock); | 622 | sock_release(sock); |
606 | return NULL; | 623 | return ERR_PTR(result); |
607 | } | 624 | } |
608 | 625 | ||
609 | 626 | ||
@@ -613,27 +630,30 @@ static struct socket * make_send_sock(void) | |||
613 | static struct socket * make_receive_sock(void) | 630 | static struct socket * make_receive_sock(void) |
614 | { | 631 | { |
615 | struct socket *sock; | 632 | struct socket *sock; |
633 | int result; | ||
616 | 634 | ||
617 | /* First create a socket */ | 635 | /* First create a socket */ |
618 | if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { | 636 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
637 | if (result < 0) { | ||
619 | IP_VS_ERR("Error during creation of socket; terminating\n"); | 638 | IP_VS_ERR("Error during creation of socket; terminating\n"); |
620 | return NULL; | 639 | return ERR_PTR(result); |
621 | } | 640 | } |
622 | 641 | ||
623 | /* it is equivalent to the REUSEADDR option in user-space */ | 642 | /* it is equivalent to the REUSEADDR option in user-space */ |
624 | sock->sk->sk_reuse = 1; | 643 | sock->sk->sk_reuse = 1; |
625 | 644 | ||
626 | if (sock->ops->bind(sock, | 645 | result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, |
627 | (struct sockaddr*)&mcast_addr, | 646 | sizeof(struct sockaddr)); |
628 | sizeof(struct sockaddr)) < 0) { | 647 | if (result < 0) { |
629 | IP_VS_ERR("Error binding to the multicast addr\n"); | 648 | IP_VS_ERR("Error binding to the multicast addr\n"); |
630 | goto error; | 649 | goto error; |
631 | } | 650 | } |
632 | 651 | ||
633 | /* join the multicast group */ | 652 | /* join the multicast group */ |
634 | if (join_mcast_group(sock->sk, | 653 | result = join_mcast_group(sock->sk, |
635 | (struct in_addr*)&mcast_addr.sin_addr, | 654 | (struct in_addr *) &mcast_addr.sin_addr, |
636 | ip_vs_backup_mcast_ifn) < 0) { | 655 | ip_vs_backup_mcast_ifn); |
656 | if (result < 0) { | ||
637 | IP_VS_ERR("Error joining to the multicast group\n"); | 657 | IP_VS_ERR("Error joining to the multicast group\n"); |
638 | goto error; | 658 | goto error; |
639 | } | 659 | } |
@@ -642,7 +662,7 @@ static struct socket * make_receive_sock(void) | |||
642 | 662 | ||
643 | error: | 663 | error: |
644 | sock_release(sock); | 664 | sock_release(sock); |
645 | return NULL; | 665 | return ERR_PTR(result); |
646 | } | 666 | } |
647 | 667 | ||
648 | 668 | ||
@@ -700,44 +720,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | |||
700 | } | 720 | } |
701 | 721 | ||
702 | 722 | ||
703 | static DECLARE_WAIT_QUEUE_HEAD(sync_wait); | 723 | static int sync_thread_master(void *data) |
704 | static pid_t sync_master_pid = 0; | ||
705 | static pid_t sync_backup_pid = 0; | ||
706 | |||
707 | static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); | ||
708 | static int stop_master_sync = 0; | ||
709 | static int stop_backup_sync = 0; | ||
710 | |||
711 | static void sync_master_loop(void) | ||
712 | { | 724 | { |
713 | struct socket *sock; | 725 | struct ip_vs_sync_thread_data *tinfo = data; |
714 | struct ip_vs_sync_buff *sb; | 726 | struct ip_vs_sync_buff *sb; |
715 | 727 | ||
716 | /* create the sending multicast socket */ | ||
717 | sock = make_send_sock(); | ||
718 | if (!sock) | ||
719 | return; | ||
720 | |||
721 | IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " | 728 | IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " |
722 | "syncid = %d\n", | 729 | "syncid = %d\n", |
723 | ip_vs_master_mcast_ifn, ip_vs_master_syncid); | 730 | ip_vs_master_mcast_ifn, ip_vs_master_syncid); |
724 | 731 | ||
725 | for (;;) { | 732 | while (!kthread_should_stop()) { |
726 | while ((sb=sb_dequeue())) { | 733 | while ((sb = sb_dequeue())) { |
727 | ip_vs_send_sync_msg(sock, sb->mesg); | 734 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); |
728 | ip_vs_sync_buff_release(sb); | 735 | ip_vs_sync_buff_release(sb); |
729 | } | 736 | } |
730 | 737 | ||
731 | /* check if entries stay in curr_sb for 2 seconds */ | 738 | /* check if entries stay in curr_sb for 2 seconds */ |
732 | if ((sb = get_curr_sync_buff(2*HZ))) { | 739 | sb = get_curr_sync_buff(2 * HZ); |
733 | ip_vs_send_sync_msg(sock, sb->mesg); | 740 | if (sb) { |
741 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | ||
734 | ip_vs_sync_buff_release(sb); | 742 | ip_vs_sync_buff_release(sb); |
735 | } | 743 | } |
736 | 744 | ||
737 | if (stop_master_sync) | 745 | schedule_timeout_interruptible(HZ); |
738 | break; | ||
739 | |||
740 | msleep_interruptible(1000); | ||
741 | } | 746 | } |
742 | 747 | ||
743 | /* clean up the sync_buff queue */ | 748 | /* clean up the sync_buff queue */ |
@@ -751,267 +756,175 @@ static void sync_master_loop(void) | |||
751 | } | 756 | } |
752 | 757 | ||
753 | /* release the sending multicast socket */ | 758 | /* release the sending multicast socket */ |
754 | sock_release(sock); | 759 | sock_release(tinfo->sock); |
760 | kfree(tinfo); | ||
761 | |||
762 | return 0; | ||
755 | } | 763 | } |
756 | 764 | ||
757 | 765 | ||
758 | static void sync_backup_loop(void) | 766 | static int sync_thread_backup(void *data) |
759 | { | 767 | { |
760 | struct socket *sock; | 768 | struct ip_vs_sync_thread_data *tinfo = data; |
761 | char *buf; | ||
762 | int len; | 769 | int len; |
763 | 770 | ||
764 | if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) { | ||
765 | IP_VS_ERR("sync_backup_loop: kmalloc error\n"); | ||
766 | return; | ||
767 | } | ||
768 | |||
769 | /* create the receiving multicast socket */ | ||
770 | sock = make_receive_sock(); | ||
771 | if (!sock) | ||
772 | goto out; | ||
773 | |||
774 | IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " | 771 | IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " |
775 | "syncid = %d\n", | 772 | "syncid = %d\n", |
776 | ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); | 773 | ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); |
777 | 774 | ||
778 | for (;;) { | 775 | while (!kthread_should_stop()) { |
779 | /* do you have data now? */ | 776 | wait_event_interruptible(*tinfo->sock->sk->sk_sleep, |
780 | while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { | 777 | !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) |
781 | if ((len = | 778 | || kthread_should_stop()); |
782 | ip_vs_receive(sock, buf, | 779 | |
783 | sync_recv_mesg_maxlen)) <= 0) { | 780 | /* do we have data now? */ |
781 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { | ||
782 | len = ip_vs_receive(tinfo->sock, tinfo->buf, | ||
783 | sync_recv_mesg_maxlen); | ||
784 | if (len <= 0) { | ||
784 | IP_VS_ERR("receiving message error\n"); | 785 | IP_VS_ERR("receiving message error\n"); |
785 | break; | 786 | break; |
786 | } | 787 | } |
787 | /* disable bottom half, because it accessed the data | 788 | |
789 | /* disable bottom half, because it accesses the data | ||
788 | shared by softirq while getting/creating conns */ | 790 | shared by softirq while getting/creating conns */ |
789 | local_bh_disable(); | 791 | local_bh_disable(); |
790 | ip_vs_process_message(buf, len); | 792 | ip_vs_process_message(tinfo->buf, len); |
791 | local_bh_enable(); | 793 | local_bh_enable(); |
792 | } | 794 | } |
793 | |||
794 | if (stop_backup_sync) | ||
795 | break; | ||
796 | |||
797 | msleep_interruptible(1000); | ||
798 | } | 795 | } |
799 | 796 | ||
800 | /* release the sending multicast socket */ | 797 | /* release the sending multicast socket */ |
801 | sock_release(sock); | 798 | sock_release(tinfo->sock); |
799 | kfree(tinfo->buf); | ||
800 | kfree(tinfo); | ||
802 | 801 | ||
803 | out: | 802 | return 0; |
804 | kfree(buf); | ||
805 | } | 803 | } |
806 | 804 | ||
807 | 805 | ||
808 | static void set_sync_pid(int sync_state, pid_t sync_pid) | 806 | int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) |
809 | { | ||
810 | if (sync_state == IP_VS_STATE_MASTER) | ||
811 | sync_master_pid = sync_pid; | ||
812 | else if (sync_state == IP_VS_STATE_BACKUP) | ||
813 | sync_backup_pid = sync_pid; | ||
814 | } | ||
815 | |||
816 | static void set_stop_sync(int sync_state, int set) | ||
817 | { | 807 | { |
818 | if (sync_state == IP_VS_STATE_MASTER) | 808 | struct ip_vs_sync_thread_data *tinfo; |
819 | stop_master_sync = set; | 809 | struct task_struct **realtask, *task; |
820 | else if (sync_state == IP_VS_STATE_BACKUP) | 810 | struct socket *sock; |
821 | stop_backup_sync = set; | 811 | char *name, *buf = NULL; |
822 | else { | 812 | int (*threadfn)(void *data); |
823 | stop_master_sync = set; | 813 | int result = -ENOMEM; |
824 | stop_backup_sync = set; | ||
825 | } | ||
826 | } | ||
827 | 814 | ||
828 | static int sync_thread(void *startup) | 815 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); |
829 | { | 816 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
830 | DECLARE_WAITQUEUE(wait, current); | 817 | sizeof(struct ip_vs_sync_conn)); |
831 | mm_segment_t oldmm; | ||
832 | int state; | ||
833 | const char *name; | ||
834 | struct ip_vs_sync_thread_data *tinfo = startup; | ||
835 | 818 | ||
836 | /* increase the module use count */ | 819 | if (state == IP_VS_STATE_MASTER) { |
837 | ip_vs_use_count_inc(); | 820 | if (sync_master_thread) |
821 | return -EEXIST; | ||
838 | 822 | ||
839 | if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { | 823 | strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, |
840 | state = IP_VS_STATE_MASTER; | 824 | sizeof(ip_vs_master_mcast_ifn)); |
825 | ip_vs_master_syncid = syncid; | ||
826 | realtask = &sync_master_thread; | ||
841 | name = "ipvs_syncmaster"; | 827 | name = "ipvs_syncmaster"; |
842 | } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { | 828 | threadfn = sync_thread_master; |
843 | state = IP_VS_STATE_BACKUP; | 829 | sock = make_send_sock(); |
830 | } else if (state == IP_VS_STATE_BACKUP) { | ||
831 | if (sync_backup_thread) | ||
832 | return -EEXIST; | ||
833 | |||
834 | strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | ||
835 | sizeof(ip_vs_backup_mcast_ifn)); | ||
836 | ip_vs_backup_syncid = syncid; | ||
837 | realtask = &sync_backup_thread; | ||
844 | name = "ipvs_syncbackup"; | 838 | name = "ipvs_syncbackup"; |
839 | threadfn = sync_thread_backup; | ||
840 | sock = make_receive_sock(); | ||
845 | } else { | 841 | } else { |
846 | IP_VS_BUG(); | ||
847 | ip_vs_use_count_dec(); | ||
848 | return -EINVAL; | 842 | return -EINVAL; |
849 | } | 843 | } |
850 | 844 | ||
851 | daemonize(name); | 845 | if (IS_ERR(sock)) { |
852 | 846 | result = PTR_ERR(sock); | |
853 | oldmm = get_fs(); | 847 | goto out; |
854 | set_fs(KERNEL_DS); | 848 | } |
855 | |||
856 | /* Block all signals */ | ||
857 | spin_lock_irq(¤t->sighand->siglock); | ||
858 | siginitsetinv(¤t->blocked, 0); | ||
859 | recalc_sigpending(); | ||
860 | spin_unlock_irq(¤t->sighand->siglock); | ||
861 | 849 | ||
862 | /* set the maximum length of sync message */ | ||
863 | set_sync_mesg_maxlen(state); | 850 | set_sync_mesg_maxlen(state); |
851 | if (state == IP_VS_STATE_BACKUP) { | ||
852 | buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); | ||
853 | if (!buf) | ||
854 | goto outsocket; | ||
855 | } | ||
864 | 856 | ||
865 | /* set up multicast address */ | 857 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); |
866 | mcast_addr.sin_family = AF_INET; | 858 | if (!tinfo) |
867 | mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); | 859 | goto outbuf; |
868 | mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP); | ||
869 | |||
870 | add_wait_queue(&sync_wait, &wait); | ||
871 | |||
872 | set_sync_pid(state, task_pid_nr(current)); | ||
873 | complete(tinfo->startup); | ||
874 | |||
875 | /* | ||
876 | * once we call the completion queue above, we should | ||
877 | * null out that reference, since its allocated on the | ||
878 | * stack of the creating kernel thread | ||
879 | */ | ||
880 | tinfo->startup = NULL; | ||
881 | |||
882 | /* processing master/backup loop here */ | ||
883 | if (state == IP_VS_STATE_MASTER) | ||
884 | sync_master_loop(); | ||
885 | else if (state == IP_VS_STATE_BACKUP) | ||
886 | sync_backup_loop(); | ||
887 | else IP_VS_BUG(); | ||
888 | |||
889 | remove_wait_queue(&sync_wait, &wait); | ||
890 | |||
891 | /* thread exits */ | ||
892 | |||
893 | /* | ||
894 | * If we weren't explicitly stopped, then we | ||
895 | * exited in error, and should undo our state | ||
896 | */ | ||
897 | if ((!stop_master_sync) && (!stop_backup_sync)) | ||
898 | ip_vs_sync_state -= tinfo->state; | ||
899 | 860 | ||
900 | set_sync_pid(state, 0); | 861 | tinfo->sock = sock; |
901 | IP_VS_INFO("sync thread stopped!\n"); | 862 | tinfo->buf = buf; |
902 | 863 | ||
903 | set_fs(oldmm); | 864 | task = kthread_run(threadfn, tinfo, name); |
865 | if (IS_ERR(task)) { | ||
866 | result = PTR_ERR(task); | ||
867 | goto outtinfo; | ||
868 | } | ||
904 | 869 | ||
905 | /* decrease the module use count */ | 870 | /* mark as active */ |
906 | ip_vs_use_count_dec(); | 871 | *realtask = task; |
872 | ip_vs_sync_state |= state; | ||
907 | 873 | ||
908 | set_stop_sync(state, 0); | 874 | /* increase the module use count */ |
909 | wake_up(&stop_sync_wait); | 875 | ip_vs_use_count_inc(); |
910 | 876 | ||
911 | /* | ||
912 | * we need to free the structure that was allocated | ||
913 | * for us in start_sync_thread | ||
914 | */ | ||
915 | kfree(tinfo); | ||
916 | return 0; | 877 | return 0; |
917 | } | ||
918 | |||
919 | |||
920 | static int fork_sync_thread(void *startup) | ||
921 | { | ||
922 | pid_t pid; | ||
923 | |||
924 | /* fork the sync thread here, then the parent process of the | ||
925 | sync thread is the init process after this thread exits. */ | ||
926 | repeat: | ||
927 | if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { | ||
928 | IP_VS_ERR("could not create sync_thread due to %d... " | ||
929 | "retrying.\n", pid); | ||
930 | msleep_interruptible(1000); | ||
931 | goto repeat; | ||
932 | } | ||
933 | 878 | ||
934 | return 0; | 879 | outtinfo: |
880 | kfree(tinfo); | ||
881 | outbuf: | ||
882 | kfree(buf); | ||
883 | outsocket: | ||
884 | sock_release(sock); | ||
885 | out: | ||
886 | return result; | ||
935 | } | 887 | } |
936 | 888 | ||
937 | 889 | ||
938 | int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | 890 | int stop_sync_thread(int state) |
939 | { | 891 | { |
940 | DECLARE_COMPLETION_ONSTACK(startup); | ||
941 | pid_t pid; | ||
942 | struct ip_vs_sync_thread_data *tinfo; | ||
943 | |||
944 | if ((state == IP_VS_STATE_MASTER && sync_master_pid) || | ||
945 | (state == IP_VS_STATE_BACKUP && sync_backup_pid)) | ||
946 | return -EEXIST; | ||
947 | |||
948 | /* | ||
949 | * Note that tinfo will be freed in sync_thread on exit | ||
950 | */ | ||
951 | tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); | ||
952 | if (!tinfo) | ||
953 | return -ENOMEM; | ||
954 | |||
955 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); | 892 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); |
956 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", | ||
957 | sizeof(struct ip_vs_sync_conn)); | ||
958 | 893 | ||
959 | ip_vs_sync_state |= state; | ||
960 | if (state == IP_VS_STATE_MASTER) { | 894 | if (state == IP_VS_STATE_MASTER) { |
961 | strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, | 895 | if (!sync_master_thread) |
962 | sizeof(ip_vs_master_mcast_ifn)); | 896 | return -ESRCH; |
963 | ip_vs_master_syncid = syncid; | ||
964 | } else { | ||
965 | strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | ||
966 | sizeof(ip_vs_backup_mcast_ifn)); | ||
967 | ip_vs_backup_syncid = syncid; | ||
968 | } | ||
969 | |||
970 | tinfo->state = state; | ||
971 | tinfo->startup = &startup; | ||
972 | |||
973 | repeat: | ||
974 | if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { | ||
975 | IP_VS_ERR("could not create fork_sync_thread due to %d... " | ||
976 | "retrying.\n", pid); | ||
977 | msleep_interruptible(1000); | ||
978 | goto repeat; | ||
979 | } | ||
980 | |||
981 | wait_for_completion(&startup); | ||
982 | |||
983 | return 0; | ||
984 | } | ||
985 | 897 | ||
898 | IP_VS_INFO("stopping master sync thread %d ...\n", | ||
899 | task_pid_nr(sync_master_thread)); | ||
986 | 900 | ||
987 | int stop_sync_thread(int state) | 901 | /* |
988 | { | 902 | * The lock synchronizes with sb_queue_tail(), so that we don't |
989 | DECLARE_WAITQUEUE(wait, current); | 903 | * add sync buffers to the queue, when we are already in |
904 | * progress of stopping the master sync daemon. | ||
905 | */ | ||
990 | 906 | ||
991 | if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || | 907 | spin_lock(&ip_vs_sync_lock); |
992 | (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) | 908 | ip_vs_sync_state &= ~IP_VS_STATE_MASTER; |
993 | return -ESRCH; | 909 | spin_unlock(&ip_vs_sync_lock); |
910 | kthread_stop(sync_master_thread); | ||
911 | sync_master_thread = NULL; | ||
912 | } else if (state == IP_VS_STATE_BACKUP) { | ||
913 | if (!sync_backup_thread) | ||
914 | return -ESRCH; | ||
915 | |||
916 | IP_VS_INFO("stopping backup sync thread %d ...\n", | ||
917 | task_pid_nr(sync_backup_thread)); | ||
918 | |||
919 | ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; | ||
920 | kthread_stop(sync_backup_thread); | ||
921 | sync_backup_thread = NULL; | ||
922 | } else { | ||
923 | return -EINVAL; | ||
924 | } | ||
994 | 925 | ||
995 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); | 926 | /* decrease the module use count */ |
996 | IP_VS_INFO("stopping sync thread %d ...\n", | 927 | ip_vs_use_count_dec(); |
997 | (state == IP_VS_STATE_MASTER) ? | ||
998 | sync_master_pid : sync_backup_pid); | ||
999 | |||
1000 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
1001 | add_wait_queue(&stop_sync_wait, &wait); | ||
1002 | set_stop_sync(state, 1); | ||
1003 | ip_vs_sync_state -= state; | ||
1004 | wake_up(&sync_wait); | ||
1005 | schedule(); | ||
1006 | __set_current_state(TASK_RUNNING); | ||
1007 | remove_wait_queue(&stop_sync_wait, &wait); | ||
1008 | |||
1009 | /* Note: no need to reap the sync thread, because its parent | ||
1010 | process is the init process */ | ||
1011 | |||
1012 | if ((state == IP_VS_STATE_MASTER && stop_master_sync) || | ||
1013 | (state == IP_VS_STATE_BACKUP && stop_backup_sync)) | ||
1014 | IP_VS_BUG(); | ||
1015 | 928 | ||
1016 | return 0; | 929 | return 0; |
1017 | } | 930 | } |