aboutsummaryrefslogtreecommitdiffstats
path: root/net/sctp
diff options
context:
space:
mode:
authorVlad Yasevich <vladislav.yasevich@hp.com>2007-04-20 15:23:15 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:27:59 -0400
commitb6e1331f3ce25a56edb956054eaf8011654686cb (patch)
tree63995f7e1de7d717df69cb4d138bce3fa4fe77ba /net/sctp
parentc95e939508e64863a1c5c73a9e1a908784e06820 (diff)
[SCTP]: Implement SCTP_FRAGMENT_INTERLEAVE socket option
This option was introduced in draft-ietf-tsvwg-sctpsocket-13. It prevents head-of-line blocking in the case of one-to-many endpoint. Applications enabling this option really must enable SCTP_SNDRCV event so that they would know where the data belongs. Based on an earlier patch by Ivan Skytte Jørgensen. Additionally, this functionality now permits multiple associations on the same endpoint to enter Partial Delivery. Applications should be extra careful, when using this functionality, to track EOR indicators. Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sctp')
-rw-r--r--net/sctp/socket.c84
-rw-r--r--net/sctp/ulpqueue.c103
2 files changed, 151 insertions, 36 deletions
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1d026f12b0e..b4be473c68b0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2255,7 +2255,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
2255 return 0; 2255 return 0;
2256} 2256}
2257 2257
2258/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) 2258/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
2259 * 2259 *
2260 * This options will get or set the delayed ack timer. The time is set 2260 * This options will get or set the delayed ack timer. The time is set
2261 * in milliseconds. If the assoc_id is 0, then this sets or gets the 2261 * in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -2792,6 +2792,46 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
2792 return 0; 2792 return 0;
2793} 2793}
2794 2794
2795/*
2796 * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
2797 *
2798 * This options will at a minimum specify if the implementation is doing
2799 * fragmented interleave. Fragmented interleave, for a one to many
2800 * socket, is when subsequent calls to receive a message may return
2801 * parts of messages from different associations. Some implementations
2802 * may allow you to turn this value on or off. If so, when turned off,
2803 * no fragment interleave will occur (which will cause a head of line
2804 * blocking amongst multiple associations sharing the same one to many
2805 * socket). When this option is turned on, then each receive call may
2806 * come from a different association (thus the user must receive data
2807 * with the extended calls (e.g. sctp_recvmsg) to keep track of which
2808 * association each receive belongs to.
2809 *
2810 * This option takes a boolean value. A non-zero value indicates that
2811 * fragmented interleave is on. A value of zero indicates that
2812 * fragmented interleave is off.
2813 *
2814 * Note that it is important that an implementation that allows this
2815 * option to be turned on, have it off by default. Otherwise an unaware
2816 * application using the one to many model may become confused and act
2817 * incorrectly.
2818 */
2819static int sctp_setsockopt_fragment_interleave(struct sock *sk,
2820 char __user *optval,
2821 int optlen)
2822{
2823 int val;
2824
2825 if (optlen != sizeof(int))
2826 return -EINVAL;
2827 if (get_user(val, (int __user *)optval))
2828 return -EFAULT;
2829
2830 sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
2831
2832 return 0;
2833}
2834
2795/* API 6.2 setsockopt(), getsockopt() 2835/* API 6.2 setsockopt(), getsockopt()
2796 * 2836 *
2797 * Applications use setsockopt() and getsockopt() to set or retrieve 2837 * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2906,7 +2946,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2906 case SCTP_CONTEXT: 2946 case SCTP_CONTEXT:
2907 retval = sctp_setsockopt_context(sk, optval, optlen); 2947 retval = sctp_setsockopt_context(sk, optval, optlen);
2908 break; 2948 break;
2909 2949 case SCTP_FRAGMENT_INTERLEAVE:
2950 retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
2951 break;
2910 default: 2952 default:
2911 retval = -ENOPROTOOPT; 2953 retval = -ENOPROTOOPT;
2912 break; 2954 break;
@@ -3134,8 +3176,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3134 sp->pf = sctp_get_pf_specific(sk->sk_family); 3176 sp->pf = sctp_get_pf_specific(sk->sk_family);
3135 3177
3136 /* Control variables for partial data delivery. */ 3178 /* Control variables for partial data delivery. */
3137 sp->pd_mode = 0; 3179 atomic_set(&sp->pd_mode, 0);
3138 skb_queue_head_init(&sp->pd_lobby); 3180 skb_queue_head_init(&sp->pd_lobby);
3181 sp->frag_interleave = 0;
3139 3182
3140 /* Create a per socket endpoint structure. Even if we 3183 /* Create a per socket endpoint structure. Even if we
3141 * change the data structure relationships, this may still 3184 * change the data structure relationships, this may still
@@ -3642,7 +3685,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
3642 return 0; 3685 return 0;
3643} 3686}
3644 3687
3645/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) 3688/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
3646 * 3689 *
3647 * This options will get or set the delayed ack timer. The time is set 3690 * This options will get or set the delayed ack timer. The time is set
3648 * in milliseconds. If the assoc_id is 0, then this sets or gets the 3691 * in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -4536,6 +4579,29 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4536 return 0; 4579 return 0;
4537} 4580}
4538 4581
4582/*
4583 * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
4584 * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
4585 */
4586static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
4587 char __user *optval, int __user *optlen)
4588{
4589 int val;
4590
4591 if (len < sizeof(int))
4592 return -EINVAL;
4593
4594 len = sizeof(int);
4595
4596 val = sctp_sk(sk)->frag_interleave;
4597 if (put_user(len, optlen))
4598 return -EFAULT;
4599 if (copy_to_user(optval, &val, len))
4600 return -EFAULT;
4601
4602 return 0;
4603}
4604
4539SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, 4605SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
4540 char __user *optval, int __user *optlen) 4606 char __user *optval, int __user *optlen)
4541{ 4607{
@@ -4648,6 +4714,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
4648 case SCTP_CONTEXT: 4714 case SCTP_CONTEXT:
4649 retval = sctp_getsockopt_context(sk, len, optval, optlen); 4715 retval = sctp_getsockopt_context(sk, len, optval, optlen);
4650 break; 4716 break;
4717 case SCTP_FRAGMENT_INTERLEAVE:
4718 retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
4719 optlen);
4720 break;
4651 default: 4721 default:
4652 retval = -ENOPROTOOPT; 4722 retval = -ENOPROTOOPT;
4653 break; 4723 break;
@@ -5742,9 +5812,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5742 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. 5812 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
5743 */ 5813 */
5744 skb_queue_head_init(&newsp->pd_lobby); 5814 skb_queue_head_init(&newsp->pd_lobby);
5745 sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode; 5815 atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
5746 5816
5747 if (sctp_sk(oldsk)->pd_mode) { 5817 if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
5748 struct sk_buff_head *queue; 5818 struct sk_buff_head *queue;
5749 5819
5750 /* Decide which queue to move pd_lobby skbs to. */ 5820 /* Decide which queue to move pd_lobby skbs to. */
@@ -5770,7 +5840,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5770 * delivery to finish. 5840 * delivery to finish.
5771 */ 5841 */
5772 if (assoc->ulpq.pd_mode) 5842 if (assoc->ulpq.pd_mode)
5773 sctp_clear_pd(oldsk); 5843 sctp_clear_pd(oldsk, NULL);
5774 5844
5775 } 5845 }
5776 5846
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index b29e3e4b72c9..ac80c34f6c2c 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,18 +138,42 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
138/* Clear the partial delivery mode for this socket. Note: This 138/* Clear the partial delivery mode for this socket. Note: This
139 * assumes that no association is currently in partial delivery mode. 139 * assumes that no association is currently in partial delivery mode.
140 */ 140 */
141int sctp_clear_pd(struct sock *sk) 141int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
142{ 142{
143 struct sctp_sock *sp = sctp_sk(sk); 143 struct sctp_sock *sp = sctp_sk(sk);
144 144
145 sp->pd_mode = 0; 145 if (atomic_dec_and_test(&sp->pd_mode)) {
146 if (!skb_queue_empty(&sp->pd_lobby)) { 146 /* This means there are no other associations in PD, so
147 struct list_head *list; 147 * we can go ahead and clear out the lobby in one shot
148 sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); 148 */
149 list = (struct list_head *)&sctp_sk(sk)->pd_lobby; 149 if (!skb_queue_empty(&sp->pd_lobby)) {
150 INIT_LIST_HEAD(list); 150 struct list_head *list;
151 return 1; 151 sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
152 list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
153 INIT_LIST_HEAD(list);
154 return 1;
155 }
156 } else {
157 /* There are other associations in PD, so we only need to
158 * pull stuff out of the lobby that belongs to the
159 * associations that is exiting PD (all of its notifications
160 * are posted here).
161 */
162 if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
163 struct sk_buff *skb, *tmp;
164 struct sctp_ulpevent *event;
165
166 sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
167 event = sctp_skb2event(skb);
168 if (event->asoc == asoc) {
169 __skb_unlink(skb, &sp->pd_lobby);
170 __skb_queue_tail(&sk->sk_receive_queue,
171 skb);
172 }
173 }
174 }
152 } 175 }
176
153 return 0; 177 return 0;
154} 178}
155 179
@@ -157,7 +181,7 @@ int sctp_clear_pd(struct sock *sk)
157static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) 181static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
158{ 182{
159 ulpq->pd_mode = 0; 183 ulpq->pd_mode = 0;
160 return sctp_clear_pd(ulpq->asoc->base.sk); 184 return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
161} 185}
162 186
163/* If the SKB of 'event' is on a list, it is the first such member 187/* If the SKB of 'event' is on a list, it is the first such member
@@ -187,25 +211,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
187 * the association the cause of the partial delivery. 211 * the association the cause of the partial delivery.
188 */ 212 */
189 213
190 if (!sctp_sk(sk)->pd_mode) { 214 if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
191 queue = &sk->sk_receive_queue; 215 queue = &sk->sk_receive_queue;
192 } else if (ulpq->pd_mode) { 216 } else {
193 /* If the association is in partial delivery, we 217 if (ulpq->pd_mode) {
194 * need to finish delivering the partially processed 218 /* If the association is in partial delivery, we
195 * packet before passing any other data. This is 219 * need to finish delivering the partially processed
196 * because we don't truly support stream interleaving. 220 * packet before passing any other data. This is
197 */ 221 * because we don't truly support stream interleaving.
198 if ((event->msg_flags & MSG_NOTIFICATION) || 222 */
199 (SCTP_DATA_NOT_FRAG == 223 if ((event->msg_flags & MSG_NOTIFICATION) ||
200 (event->msg_flags & SCTP_DATA_FRAG_MASK))) 224 (SCTP_DATA_NOT_FRAG ==
201 queue = &sctp_sk(sk)->pd_lobby; 225 (event->msg_flags & SCTP_DATA_FRAG_MASK)))
202 else { 226 queue = &sctp_sk(sk)->pd_lobby;
203 clear_pd = event->msg_flags & MSG_EOR; 227 else {
204 queue = &sk->sk_receive_queue; 228 clear_pd = event->msg_flags & MSG_EOR;
229 queue = &sk->sk_receive_queue;
230 }
231 } else {
232 /*
233 * If fragment interleave is enabled, we
234 * can queue this to the recieve queue instead
235 * of the lobby.
236 */
237 if (sctp_sk(sk)->frag_interleave)
238 queue = &sk->sk_receive_queue;
239 else
240 queue = &sctp_sk(sk)->pd_lobby;
205 } 241 }
206 } else 242 }
207 queue = &sctp_sk(sk)->pd_lobby;
208
209 243
210 /* If we are harvesting multiple skbs they will be 244 /* If we are harvesting multiple skbs they will be
211 * collected on a list. 245 * collected on a list.
@@ -826,18 +860,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
826{ 860{
827 struct sctp_ulpevent *event; 861 struct sctp_ulpevent *event;
828 struct sctp_association *asoc; 862 struct sctp_association *asoc;
863 struct sctp_sock *sp;
829 864
830 asoc = ulpq->asoc; 865 asoc = ulpq->asoc;
866 sp = sctp_sk(asoc->base.sk);
831 867
832 /* Are we already in partial delivery mode? */ 868 /* If the association is already in Partial Delivery mode
833 if (!sctp_sk(asoc->base.sk)->pd_mode) { 869 * we have noting to do.
870 */
871 if (ulpq->pd_mode)
872 return;
834 873
874 /* If the user enabled fragment interleave socket option,
875 * multiple associations can enter partial delivery.
876 * Otherwise, we can only enter partial delivery if the
877 * socket is not in partial deliver mode.
878 */
879 if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
835 /* Is partial delivery possible? */ 880 /* Is partial delivery possible? */
836 event = sctp_ulpq_retrieve_first(ulpq); 881 event = sctp_ulpq_retrieve_first(ulpq);
837 /* Send event to the ULP. */ 882 /* Send event to the ULP. */
838 if (event) { 883 if (event) {
839 sctp_ulpq_tail_event(ulpq, event); 884 sctp_ulpq_tail_event(ulpq, event);
840 sctp_sk(asoc->base.sk)->pd_mode = 1; 885 atomic_inc(&sp->pd_mode);
841 ulpq->pd_mode = 1; 886 ulpq->pd_mode = 1;
842 return; 887 return;
843 } 888 }