diff options
author | Stanislav Kinsbursky <skinsbursky@parallels.com> | 2013-01-04 18:34:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-01-04 19:11:45 -0500 |
commit | 4a674f34ba04a002244edaf891b5da7fc1473ae8 (patch) | |
tree | 14544a5d49b4a218bac3f5995503c8e208735cef | |
parent | f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad (diff) |
ipc: introduce message queue copy feature
This patch is required for checkpoint/restore in userspace.
c/r requires some way to get all pending IPC messages without deleting
them from the queue (checkpoint can fail and in this case tasks will be
resumed, so queue have to be valid).
To achive this, new operation flag MSG_COPY for sys_msgrcv() system call
was introduced. If this flag was specified, then mtype is interpreted as
number of the message to copy.
If MSG_COPY is set, then kernel will allocate dummy message with passed
size, and then use new copy_msg() helper function to copy desired message
(instead of unlinking it from the queue).
Notes:
1) Return -ENOSYS if MSG_COPY is specified, but
CONFIG_CHECKPOINT_RESTORE is not set.
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/uapi/linux/msg.h | 1 | ||||
-rw-r--r-- | ipc/msg.c | 64 | ||||
-rw-r--r-- | ipc/msgutil.c | 38 | ||||
-rw-r--r-- | ipc/util.h | 1 |
4 files changed, 102 insertions, 2 deletions
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a3..22d95c6854e0 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h | |||
@@ -10,6 +10,7 @@ | |||
10 | /* msgrcv options */ | 10 | /* msgrcv options */ |
11 | #define MSG_NOERROR 010000 /* no error if message is too big */ | 11 | #define MSG_NOERROR 010000 /* no error if message is too big */ |
12 | #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ | 12 | #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ |
13 | #define MSG_COPY 040000 /* copy (not remove) all queue messages */ | ||
13 | 14 | ||
14 | /* Obsolete, used only for backwards compatibility and libc5 compiles */ | 15 | /* Obsolete, used only for backwards compatibility and libc5 compiles */ |
15 | struct msqid_ds { | 16 | struct msqid_ds { |
@@ -769,6 +769,45 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) | |||
769 | return msgsz; | 769 | return msgsz; |
770 | } | 770 | } |
771 | 771 | ||
772 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
773 | static inline struct msg_msg *fill_copy(unsigned long copy_nr, | ||
774 | unsigned long msg_nr, | ||
775 | struct msg_msg *msg, | ||
776 | struct msg_msg *copy) | ||
777 | { | ||
778 | if (copy_nr == msg_nr) | ||
779 | return copy_msg(msg, copy); | ||
780 | return NULL; | ||
781 | } | ||
782 | |||
783 | static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, | ||
784 | int msgflg, long *msgtyp, | ||
785 | unsigned long *copy_number) | ||
786 | { | ||
787 | struct msg_msg *copy; | ||
788 | |||
789 | *copy_number = *msgtyp; | ||
790 | *msgtyp = 0; | ||
791 | /* | ||
792 | * Create dummy message to copy real message to. | ||
793 | */ | ||
794 | copy = load_msg(buf, bufsz); | ||
795 | if (!IS_ERR(copy)) | ||
796 | copy->m_ts = bufsz; | ||
797 | return copy; | ||
798 | } | ||
799 | |||
800 | static inline void free_copy(int msgflg, struct msg_msg *copy) | ||
801 | { | ||
802 | if (msgflg & MSG_COPY) | ||
803 | free_msg(copy); | ||
804 | } | ||
805 | #else | ||
806 | #define free_copy(msgflg, copy) do {} while (0) | ||
807 | #define prepare_copy(buf, sz, msgflg, msgtyp, copy_nr) ERR_PTR(-ENOSYS) | ||
808 | #define fill_copy(copy_nr, msg_nr, msg, copy) NULL | ||
809 | #endif | ||
810 | |||
772 | long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, | 811 | long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
773 | int msgflg, | 812 | int msgflg, |
774 | long (*msg_handler)(void __user *, struct msg_msg *, size_t)) | 813 | long (*msg_handler)(void __user *, struct msg_msg *, size_t)) |
@@ -777,19 +816,29 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, | |||
777 | struct msg_msg *msg; | 816 | struct msg_msg *msg; |
778 | int mode; | 817 | int mode; |
779 | struct ipc_namespace *ns; | 818 | struct ipc_namespace *ns; |
819 | struct msg_msg *copy; | ||
820 | unsigned long __maybe_unused copy_number; | ||
780 | 821 | ||
781 | if (msqid < 0 || (long) bufsz < 0) | 822 | if (msqid < 0 || (long) bufsz < 0) |
782 | return -EINVAL; | 823 | return -EINVAL; |
824 | if (msgflg & MSG_COPY) { | ||
825 | copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); | ||
826 | if (IS_ERR(copy)) | ||
827 | return PTR_ERR(copy); | ||
828 | } | ||
783 | mode = convert_mode(&msgtyp, msgflg); | 829 | mode = convert_mode(&msgtyp, msgflg); |
784 | ns = current->nsproxy->ipc_ns; | 830 | ns = current->nsproxy->ipc_ns; |
785 | 831 | ||
786 | msq = msg_lock_check(ns, msqid); | 832 | msq = msg_lock_check(ns, msqid); |
787 | if (IS_ERR(msq)) | 833 | if (IS_ERR(msq)) { |
834 | free_copy(msgflg, copy); | ||
788 | return PTR_ERR(msq); | 835 | return PTR_ERR(msq); |
836 | } | ||
789 | 837 | ||
790 | for (;;) { | 838 | for (;;) { |
791 | struct msg_receiver msr_d; | 839 | struct msg_receiver msr_d; |
792 | struct list_head *tmp; | 840 | struct list_head *tmp; |
841 | long msg_counter = 0; | ||
793 | 842 | ||
794 | msg = ERR_PTR(-EACCES); | 843 | msg = ERR_PTR(-EACCES); |
795 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) | 844 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) |
@@ -809,8 +858,15 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, | |||
809 | if (mode == SEARCH_LESSEQUAL && | 858 | if (mode == SEARCH_LESSEQUAL && |
810 | walk_msg->m_type != 1) { | 859 | walk_msg->m_type != 1) { |
811 | msgtyp = walk_msg->m_type - 1; | 860 | msgtyp = walk_msg->m_type - 1; |
861 | } else if (msgflg & MSG_COPY) { | ||
862 | msg = fill_copy(copy_number, | ||
863 | msg_counter, | ||
864 | walk_msg, copy); | ||
865 | if (msg) | ||
866 | break; | ||
812 | } else | 867 | } else |
813 | break; | 868 | break; |
869 | msg_counter++; | ||
814 | } | 870 | } |
815 | tmp = tmp->next; | 871 | tmp = tmp->next; |
816 | } | 872 | } |
@@ -823,6 +879,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, | |||
823 | msg = ERR_PTR(-E2BIG); | 879 | msg = ERR_PTR(-E2BIG); |
824 | goto out_unlock; | 880 | goto out_unlock; |
825 | } | 881 | } |
882 | if (msgflg & MSG_COPY) | ||
883 | goto out_unlock; | ||
826 | list_del(&msg->m_list); | 884 | list_del(&msg->m_list); |
827 | msq->q_qnum--; | 885 | msq->q_qnum--; |
828 | msq->q_rtime = get_seconds(); | 886 | msq->q_rtime = get_seconds(); |
@@ -906,8 +964,10 @@ out_unlock: | |||
906 | break; | 964 | break; |
907 | } | 965 | } |
908 | } | 966 | } |
909 | if (IS_ERR(msg)) | 967 | if (IS_ERR(msg)) { |
968 | free_copy(msgflg, copy); | ||
910 | return PTR_ERR(msg); | 969 | return PTR_ERR(msg); |
970 | } | ||
911 | 971 | ||
912 | bufsz = msg_handler(buf, msg, bufsz); | 972 | bufsz = msg_handler(buf, msg, bufsz); |
913 | free_msg(msg); | 973 | free_msg(msg); |
diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 6471f1bdae96..7eecdad40efc 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c | |||
@@ -102,7 +102,45 @@ out_err: | |||
102 | free_msg(msg); | 102 | free_msg(msg); |
103 | return ERR_PTR(err); | 103 | return ERR_PTR(err); |
104 | } | 104 | } |
105 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
106 | struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) | ||
107 | { | ||
108 | struct msg_msgseg *dst_pseg, *src_pseg; | ||
109 | int len = src->m_ts; | ||
110 | int alen; | ||
111 | |||
112 | BUG_ON(dst == NULL); | ||
113 | if (src->m_ts > dst->m_ts) | ||
114 | return ERR_PTR(-EINVAL); | ||
115 | |||
116 | alen = len; | ||
117 | if (alen > DATALEN_MSG) | ||
118 | alen = DATALEN_MSG; | ||
119 | |||
120 | dst->next = NULL; | ||
121 | dst->security = NULL; | ||
105 | 122 | ||
123 | memcpy(dst + 1, src + 1, alen); | ||
124 | |||
125 | len -= alen; | ||
126 | dst_pseg = dst->next; | ||
127 | src_pseg = src->next; | ||
128 | while (len > 0) { | ||
129 | alen = len; | ||
130 | if (alen > DATALEN_SEG) | ||
131 | alen = DATALEN_SEG; | ||
132 | memcpy(dst_pseg + 1, src_pseg + 1, alen); | ||
133 | dst_pseg = dst_pseg->next; | ||
134 | len -= alen; | ||
135 | src_pseg = src_pseg->next; | ||
136 | } | ||
137 | |||
138 | dst->m_type = src->m_type; | ||
139 | dst->m_ts = src->m_ts; | ||
140 | |||
141 | return dst; | ||
142 | } | ||
143 | #endif | ||
106 | int store_msg(void __user *dest, struct msg_msg *msg, int len) | 144 | int store_msg(void __user *dest, struct msg_msg *msg, int len) |
107 | { | 145 | { |
108 | int alen; | 146 | int alen; |
diff --git a/ipc/util.h b/ipc/util.h index a61e0ca2bffd..eeb79a1fbd83 100644 --- a/ipc/util.h +++ b/ipc/util.h | |||
@@ -140,6 +140,7 @@ int ipc_parse_version (int *cmd); | |||
140 | 140 | ||
141 | extern void free_msg(struct msg_msg *msg); | 141 | extern void free_msg(struct msg_msg *msg); |
142 | extern struct msg_msg *load_msg(const void __user *src, int len); | 142 | extern struct msg_msg *load_msg(const void __user *src, int len); |
143 | extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); | ||
143 | extern int store_msg(void __user *dest, struct msg_msg *msg, int len); | 144 | extern int store_msg(void __user *dest, struct msg_msg *msg, int len); |
144 | 145 | ||
145 | extern void recompute_msgmni(struct ipc_namespace *); | 146 | extern void recompute_msgmni(struct ipc_namespace *); |