aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSven Wegener <sven.wegener@stealer.net>2008-07-16 07:13:50 -0400
committerSven Wegener <sven.wegener@stealer.net>2008-07-16 18:33:20 -0400
commit998e7a76804b7a273a0460c2cdd5a51fa9856717 (patch)
tree42c5617f9fc180457e7f6f98326edf489a671086
parente6dd731c75cba986a485924f908e6e05b088ea9e (diff)
ipvs: Use kthread_run() instead of doing a double-fork via kernel_thread()
This also moves the setup code out of the daemons, so that we're able to return proper error codes to user space. The current code will return success to user space when the daemon is started with an invald mcast interface. With these changes we get an appropriate "No such device" error. We longer need our own completion to be sure the daemons are actually running, because they no longer contain code that can fail and kthread_run() takes care of the rest. Signed-off-by: Sven Wegener <sven.wegener@stealer.net> Acked-by: Simon Horman <horms@verge.net.au>
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c369
1 files changed, 136 insertions, 233 deletions
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 60b96823c9ae..550563a56607 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -28,10 +28,10 @@
28#include <linux/igmp.h> /* for ip_mc_join_group */ 28#include <linux/igmp.h> /* for ip_mc_join_group */
29#include <linux/udp.h> 29#include <linux/udp.h>
30#include <linux/err.h> 30#include <linux/err.h>
31#include <linux/kthread.h>
31 32
32#include <net/ip.h> 33#include <net/ip.h>
33#include <net/sock.h> 34#include <net/sock.h>
34#include <asm/uaccess.h> /* for get_fs and set_fs */
35 35
36#include <net/ip_vs.h> 36#include <net/ip_vs.h>
37 37
@@ -67,8 +67,8 @@ struct ip_vs_sync_conn_options {
67}; 67};
68 68
69struct ip_vs_sync_thread_data { 69struct ip_vs_sync_thread_data {
70 struct completion *startup; 70 struct socket *sock;
71 int state; 71 char *buf;
72}; 72};
73 73
74#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 74#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
@@ -139,6 +139,10 @@ volatile int ip_vs_backup_syncid = 0;
139char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 139char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
140char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 140char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
141 141
142/* sync daemon tasks */
143static struct task_struct *sync_master_thread;
144static struct task_struct *sync_backup_thread;
145
142/* multicast addr */ 146/* multicast addr */
143static struct sockaddr_in mcast_addr = { 147static struct sockaddr_in mcast_addr = {
144 .sin_family = AF_INET, 148 .sin_family = AF_INET,
@@ -147,14 +151,7 @@ static struct sockaddr_in mcast_addr = {
147}; 151};
148 152
149 153
150static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 154static inline struct ip_vs_sync_buff *sb_dequeue(void)
151{
152 spin_lock(&ip_vs_sync_lock);
153 list_add_tail(&sb->list, &ip_vs_sync_queue);
154 spin_unlock(&ip_vs_sync_lock);
155}
156
157static inline struct ip_vs_sync_buff * sb_dequeue(void)
158{ 155{
159 struct ip_vs_sync_buff *sb; 156 struct ip_vs_sync_buff *sb;
160 157
@@ -198,6 +195,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
198 kfree(sb); 195 kfree(sb);
199} 196}
200 197
198static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
199{
200 spin_lock(&ip_vs_sync_lock);
201 if (ip_vs_sync_state & IP_VS_STATE_MASTER)
202 list_add_tail(&sb->list, &ip_vs_sync_queue);
203 else
204 ip_vs_sync_buff_release(sb);
205 spin_unlock(&ip_vs_sync_lock);
206}
207
201/* 208/*
202 * Get the current sync buffer if it has been created for more 209 * Get the current sync buffer if it has been created for more
203 * than the specified time or the specified time is zero. 210 * than the specified time or the specified time is zero.
@@ -712,43 +719,28 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
712} 719}
713 720
714 721
715static DECLARE_WAIT_QUEUE_HEAD(sync_wait); 722static int sync_thread_master(void *data)
716static pid_t sync_master_pid = 0;
717static pid_t sync_backup_pid = 0;
718
719static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
720static int stop_master_sync = 0;
721static int stop_backup_sync = 0;
722
723static void sync_master_loop(void)
724{ 723{
725 struct socket *sock; 724 struct ip_vs_sync_thread_data *tinfo = data;
726 struct ip_vs_sync_buff *sb; 725 struct ip_vs_sync_buff *sb;
727 726
728 /* create the sending multicast socket */
729 sock = make_send_sock();
730 if (IS_ERR(sock))
731 return;
732
733 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " 727 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
734 "syncid = %d\n", 728 "syncid = %d\n",
735 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 729 ip_vs_master_mcast_ifn, ip_vs_master_syncid);
736 730
737 for (;;) { 731 while (!kthread_should_stop()) {
738 while ((sb=sb_dequeue())) { 732 while ((sb = sb_dequeue())) {
739 ip_vs_send_sync_msg(sock, sb->mesg); 733 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
740 ip_vs_sync_buff_release(sb); 734 ip_vs_sync_buff_release(sb);
741 } 735 }
742 736
743 /* check if entries stay in curr_sb for 2 seconds */ 737 /* check if entries stay in curr_sb for 2 seconds */
744 if ((sb = get_curr_sync_buff(2*HZ))) { 738 sb = get_curr_sync_buff(2 * HZ);
745 ip_vs_send_sync_msg(sock, sb->mesg); 739 if (sb) {
740 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
746 ip_vs_sync_buff_release(sb); 741 ip_vs_sync_buff_release(sb);
747 } 742 }
748 743
749 if (stop_master_sync)
750 break;
751
752 msleep_interruptible(1000); 744 msleep_interruptible(1000);
753 } 745 }
754 746
@@ -763,262 +755,173 @@ static void sync_master_loop(void)
763 } 755 }
764 756
765 /* release the sending multicast socket */ 757 /* release the sending multicast socket */
766 sock_release(sock); 758 sock_release(tinfo->sock);
759 kfree(tinfo);
760
761 return 0;
767} 762}
768 763
769 764
770static void sync_backup_loop(void) 765static int sync_thread_backup(void *data)
771{ 766{
772 struct socket *sock; 767 struct ip_vs_sync_thread_data *tinfo = data;
773 char *buf;
774 int len; 768 int len;
775 769
776 if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
777 IP_VS_ERR("sync_backup_loop: kmalloc error\n");
778 return;
779 }
780
781 /* create the receiving multicast socket */
782 sock = make_receive_sock();
783 if (IS_ERR(sock))
784 goto out;
785
786 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " 770 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
787 "syncid = %d\n", 771 "syncid = %d\n",
788 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 772 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
789 773
790 for (;;) { 774 while (!kthread_should_stop()) {
791 /* do you have data now? */ 775 /* do we have data now? */
792 while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { 776 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
793 if ((len = 777 len = ip_vs_receive(tinfo->sock, tinfo->buf,
794 ip_vs_receive(sock, buf, 778 sync_recv_mesg_maxlen);
795 sync_recv_mesg_maxlen)) <= 0) { 779 if (len <= 0) {
796 IP_VS_ERR("receiving message error\n"); 780 IP_VS_ERR("receiving message error\n");
797 break; 781 break;
798 } 782 }
799 /* disable bottom half, because it accessed the data 783
784 /* disable bottom half, because it accesses the data
800 shared by softirq while getting/creating conns */ 785 shared by softirq while getting/creating conns */
801 local_bh_disable(); 786 local_bh_disable();
802 ip_vs_process_message(buf, len); 787 ip_vs_process_message(tinfo->buf, len);
803 local_bh_enable(); 788 local_bh_enable();
804 } 789 }
805 790
806 if (stop_backup_sync)
807 break;
808
809 msleep_interruptible(1000); 791 msleep_interruptible(1000);
810 } 792 }
811 793
812 /* release the sending multicast socket */ 794 /* release the sending multicast socket */
813 sock_release(sock); 795 sock_release(tinfo->sock);
796 kfree(tinfo->buf);
797 kfree(tinfo);
814 798
815 out: 799 return 0;
816 kfree(buf);
817} 800}
818 801
819 802
820static void set_sync_pid(int sync_state, pid_t sync_pid) 803int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
821{
822 if (sync_state == IP_VS_STATE_MASTER)
823 sync_master_pid = sync_pid;
824 else if (sync_state == IP_VS_STATE_BACKUP)
825 sync_backup_pid = sync_pid;
826}
827
828static void set_stop_sync(int sync_state, int set)
829{ 804{
830 if (sync_state == IP_VS_STATE_MASTER) 805 struct ip_vs_sync_thread_data *tinfo;
831 stop_master_sync = set; 806 struct task_struct **realtask, *task;
832 else if (sync_state == IP_VS_STATE_BACKUP) 807 struct socket *sock;
833 stop_backup_sync = set; 808 char *name, *buf = NULL;
834 else { 809 int (*threadfn)(void *data);
835 stop_master_sync = set; 810 int result = -ENOMEM;
836 stop_backup_sync = set;
837 }
838}
839 811
840static int sync_thread(void *startup) 812 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
841{ 813 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
842 DECLARE_WAITQUEUE(wait, current); 814 sizeof(struct ip_vs_sync_conn));
843 mm_segment_t oldmm;
844 int state;
845 const char *name;
846 struct ip_vs_sync_thread_data *tinfo = startup;
847 815
848 /* increase the module use count */ 816 if (state == IP_VS_STATE_MASTER) {
849 ip_vs_use_count_inc(); 817 if (sync_master_thread)
818 return -EEXIST;
850 819
851 if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { 820 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
852 state = IP_VS_STATE_MASTER; 821 sizeof(ip_vs_master_mcast_ifn));
822 ip_vs_master_syncid = syncid;
823 realtask = &sync_master_thread;
853 name = "ipvs_syncmaster"; 824 name = "ipvs_syncmaster";
854 } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { 825 threadfn = sync_thread_master;
855 state = IP_VS_STATE_BACKUP; 826 sock = make_send_sock();
827 } else if (state == IP_VS_STATE_BACKUP) {
828 if (sync_backup_thread)
829 return -EEXIST;
830
831 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
832 sizeof(ip_vs_backup_mcast_ifn));
833 ip_vs_backup_syncid = syncid;
834 realtask = &sync_backup_thread;
856 name = "ipvs_syncbackup"; 835 name = "ipvs_syncbackup";
836 threadfn = sync_thread_backup;
837 sock = make_receive_sock();
857 } else { 838 } else {
858 IP_VS_BUG();
859 ip_vs_use_count_dec();
860 return -EINVAL; 839 return -EINVAL;
861 } 840 }
862 841
863 daemonize(name); 842 if (IS_ERR(sock)) {
864 843 result = PTR_ERR(sock);
865 oldmm = get_fs(); 844 goto out;
866 set_fs(KERNEL_DS); 845 }
867
868 /* Block all signals */
869 spin_lock_irq(&current->sighand->siglock);
870 siginitsetinv(&current->blocked, 0);
871 recalc_sigpending();
872 spin_unlock_irq(&current->sighand->siglock);
873 846
874 /* set the maximum length of sync message */
875 set_sync_mesg_maxlen(state); 847 set_sync_mesg_maxlen(state);
848 if (state == IP_VS_STATE_BACKUP) {
849 buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
850 if (!buf)
851 goto outsocket;
852 }
876 853
877 add_wait_queue(&sync_wait, &wait); 854 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
878 855 if (!tinfo)
879 set_sync_pid(state, task_pid_nr(current)); 856 goto outbuf;
880 complete(tinfo->startup);
881
882 /*
883 * once we call the completion queue above, we should
884 * null out that reference, since its allocated on the
885 * stack of the creating kernel thread
886 */
887 tinfo->startup = NULL;
888
889 /* processing master/backup loop here */
890 if (state == IP_VS_STATE_MASTER)
891 sync_master_loop();
892 else if (state == IP_VS_STATE_BACKUP)
893 sync_backup_loop();
894 else IP_VS_BUG();
895
896 remove_wait_queue(&sync_wait, &wait);
897
898 /* thread exits */
899
900 /*
901 * If we weren't explicitly stopped, then we
902 * exited in error, and should undo our state
903 */
904 if ((!stop_master_sync) && (!stop_backup_sync))
905 ip_vs_sync_state -= tinfo->state;
906 857
907 set_sync_pid(state, 0); 858 tinfo->sock = sock;
908 IP_VS_INFO("sync thread stopped!\n"); 859 tinfo->buf = buf;
909 860
910 set_fs(oldmm); 861 task = kthread_run(threadfn, tinfo, name);
862 if (IS_ERR(task)) {
863 result = PTR_ERR(task);
864 goto outtinfo;
865 }
911 866
912 /* decrease the module use count */ 867 /* mark as active */
913 ip_vs_use_count_dec(); 868 *realtask = task;
869 ip_vs_sync_state |= state;
914 870
915 set_stop_sync(state, 0); 871 /* increase the module use count */
916 wake_up(&stop_sync_wait); 872 ip_vs_use_count_inc();
917 873
918 /*
919 * we need to free the structure that was allocated
920 * for us in start_sync_thread
921 */
922 kfree(tinfo);
923 return 0; 874 return 0;
924}
925
926
927static int fork_sync_thread(void *startup)
928{
929 pid_t pid;
930
931 /* fork the sync thread here, then the parent process of the
932 sync thread is the init process after this thread exits. */
933 repeat:
934 if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
935 IP_VS_ERR("could not create sync_thread due to %d... "
936 "retrying.\n", pid);
937 msleep_interruptible(1000);
938 goto repeat;
939 }
940 875
941 return 0; 876outtinfo:
877 kfree(tinfo);
878outbuf:
879 kfree(buf);
880outsocket:
881 sock_release(sock);
882out:
883 return result;
942} 884}
943 885
944 886
945int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 887int stop_sync_thread(int state)
946{ 888{
947 DECLARE_COMPLETION_ONSTACK(startup);
948 pid_t pid;
949 struct ip_vs_sync_thread_data *tinfo;
950
951 if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
952 (state == IP_VS_STATE_BACKUP && sync_backup_pid))
953 return -EEXIST;
954
955 /*
956 * Note that tinfo will be freed in sync_thread on exit
957 */
958 tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
959 if (!tinfo)
960 return -ENOMEM;
961
962 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); 889 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
963 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
964 sizeof(struct ip_vs_sync_conn));
965 890
966 ip_vs_sync_state |= state;
967 if (state == IP_VS_STATE_MASTER) { 891 if (state == IP_VS_STATE_MASTER) {
968 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 892 if (!sync_master_thread)
969 sizeof(ip_vs_master_mcast_ifn)); 893 return -ESRCH;
970 ip_vs_master_syncid = syncid;
971 } else {
972 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
973 sizeof(ip_vs_backup_mcast_ifn));
974 ip_vs_backup_syncid = syncid;
975 }
976
977 tinfo->state = state;
978 tinfo->startup = &startup;
979 894
980 repeat: 895 IP_VS_INFO("stopping master sync thread %d ...\n",
981 if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { 896 task_pid_nr(sync_master_thread));
982 IP_VS_ERR("could not create fork_sync_thread due to %d... "
983 "retrying.\n", pid);
984 msleep_interruptible(1000);
985 goto repeat;
986 }
987 897
988 wait_for_completion(&startup); 898 /*
989 899 * The lock synchronizes with sb_queue_tail(), so that we don't
990 return 0; 900 * add sync buffers to the queue, when we are already in
991} 901 * progress of stopping the master sync daemon.
992 902 */
993
994int stop_sync_thread(int state)
995{
996 DECLARE_WAITQUEUE(wait, current);
997 903
998 if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || 904 spin_lock(&ip_vs_sync_lock);
999 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 905 ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
1000 return -ESRCH; 906 spin_unlock(&ip_vs_sync_lock);
907 kthread_stop(sync_master_thread);
908 sync_master_thread = NULL;
909 } else if (state == IP_VS_STATE_BACKUP) {
910 if (!sync_backup_thread)
911 return -ESRCH;
912
913 IP_VS_INFO("stopping backup sync thread %d ...\n",
914 task_pid_nr(sync_backup_thread));
915
916 ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
917 kthread_stop(sync_backup_thread);
918 sync_backup_thread = NULL;
919 } else {
920 return -EINVAL;
921 }
1001 922
1002 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); 923 /* decrease the module use count */
1003 IP_VS_INFO("stopping sync thread %d ...\n", 924 ip_vs_use_count_dec();
1004 (state == IP_VS_STATE_MASTER) ?
1005 sync_master_pid : sync_backup_pid);
1006
1007 __set_current_state(TASK_UNINTERRUPTIBLE);
1008 add_wait_queue(&stop_sync_wait, &wait);
1009 set_stop_sync(state, 1);
1010 ip_vs_sync_state -= state;
1011 wake_up(&sync_wait);
1012 schedule();
1013 __set_current_state(TASK_RUNNING);
1014 remove_wait_queue(&stop_sync_wait, &wait);
1015
1016 /* Note: no need to reap the sync thread, because its parent
1017 process is the init process */
1018
1019 if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
1020 (state == IP_VS_STATE_BACKUP && stop_backup_sync))
1021 IP_VS_BUG();
1022 925
1023 return 0; 926 return 0;
1024} 927}