aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomasz Grobelny <tomasz@grobelny.oswiecenia.net>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:39 -0400
commitd6da3511d6b558d0b017777b61dc08b8fbc06ea4 (patch)
tree473f9131b9e641d803bfbea174cf1dfc45aea3ca
parentddab05568eaa70fc92b2aae957136f188f724e9c (diff)
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-rw-r--r--Documentation/networking/dccp.txt19
-rw-r--r--include/linux/dccp.h21
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c67
-rw-r--r--net/dccp/qpolicy.c126
7 files changed, 246 insertions, 8 deletions
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index b132e4a3cf0f..fcfc12534428 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -45,6 +45,25 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
45 45
46Socket options 46Socket options
47============== 47==============
48DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
49a policy ID as argument and can only be set before the connection (i.e. changes
50during an established connection are not supported). Currently, two policies are
51defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
52and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
53u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
54a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
55be formatted using a cmsg(3) message header filled in as follows:
56 cmsg->cmsg_level = SOL_DCCP;
57 cmsg->cmsg_type = DCCP_SCM_PRIORITY;
58 cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
59
60DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
61value is always interpreted as unbounded queue length. If different from zero,
62the interpretation of this parameter depends on the current dequeuing policy
63(see above): the "simple" policy will enforce a fixed queue size by returning
64EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
65lowest-priority packet first. The default value for this parameter is
66initialised from /proc/sys/net/dccp/default/tx_qlen.
48 67
49DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of 68DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
50service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, 69service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d0..010e2d87ed75 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
197 DCCPF_MAX_CCID_SPECIFIC = 255, 197 DCCPF_MAX_CCID_SPECIFIC = 255,
198}; 198};
199 199
200/* DCCP socket control message types for cmsg */
201enum dccp_cmsg_type {
202 DCCP_SCM_PRIORITY = 1,
203 DCCP_SCM_QPOLICY_MAX = 0xFFFF,
204 /* ^-- Up to here reserved exclusively for qpolicy parameters */
205 DCCP_SCM_MAX
206};
207
208/* DCCP priorities for outgoing/queued packets */
209enum dccp_packet_dequeueing_policy {
210 DCCPQ_POLICY_SIMPLE,
211 DCCPQ_POLICY_PRIO,
212 DCCPQ_POLICY_MAX
213};
214
200/* DCCP socket options */ 215/* DCCP socket options */
201#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ 216#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */
202#define DCCP_SOCKOPT_SERVICE 2 217#define DCCP_SOCKOPT_SERVICE 2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
210#define DCCP_SOCKOPT_CCID 13 225#define DCCP_SOCKOPT_CCID 13
211#define DCCP_SOCKOPT_TX_CCID 14 226#define DCCP_SOCKOPT_TX_CCID 14
212#define DCCP_SOCKOPT_RX_CCID 15 227#define DCCP_SOCKOPT_RX_CCID 15
228#define DCCP_SOCKOPT_QPOLICY_ID 16
229#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17
213#define DCCP_SOCKOPT_CCID_RX_INFO 128 230#define DCCP_SOCKOPT_CCID_RX_INFO 128
214#define DCCP_SOCKOPT_CCID_TX_INFO 192 231#define DCCP_SOCKOPT_CCID_TX_INFO 192
215 232
@@ -458,6 +475,8 @@ struct dccp_ackvec;
458 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) 475 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
459 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) 476 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
460 * @dccps_options_received - parsed set of retrieved options 477 * @dccps_options_received - parsed set of retrieved options
478 * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
479 * @dccps_tx_qlen - maximum length of the TX queue
461 * @dccps_role - role of this sock, one of %dccp_role 480 * @dccps_role - role of this sock, one of %dccp_role
462 * @dccps_hc_rx_insert_options - receiver wants to add options when acking 481 * @dccps_hc_rx_insert_options - receiver wants to add options when acking
463 * @dccps_hc_tx_insert_options - sender wants to add options when sending 482 * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
500 struct ccid *dccps_hc_rx_ccid; 519 struct ccid *dccps_hc_rx_ccid;
501 struct ccid *dccps_hc_tx_ccid; 520 struct ccid *dccps_hc_tx_ccid;
502 struct dccp_options_received dccps_options_received; 521 struct dccp_options_received dccps_options_received;
522 __u8 dccps_qpolicy;
523 __u32 dccps_tx_qlen;
503 enum dccp_role dccps_role:2; 524 enum dccp_role dccps_role:2;
504 __u8 dccps_hc_rx_insert_options:1; 525 __u8 dccps_hc_rx_insert_options:1;
505 __u8 dccps_hc_tx_insert_options:1; 526 __u8 dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index b68440bd7fa2..0c1c9af2bf7e 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o \ 3dccp-y := ccid.o feat.o input.o minisocks.o options.o \
4 output.o proto.o timer.o ackvec.o 4 qpolicy.o output.o proto.o timer.o ackvec.o
5 5
6dccp_ipv4-y := ipv4.o 6dccp_ipv4-y := ipv4.o
7 7
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 74c90cd27677..ce2dd6f6f34d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -234,6 +234,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
234extern void dccp_send_sync(struct sock *sk, const u64 seq, 234extern void dccp_send_sync(struct sock *sk, const u64 seq,
235 const enum dccp_pkt_type pkt_type); 235 const enum dccp_pkt_type pkt_type);
236 236
237/*
238 * TX Packet Dequeueing Interface
239 */
240extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
241extern bool dccp_qpolicy_full(struct sock *sk);
242extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
243extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
244extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
245
246/*
247 * TX Packet Output and TX Timers
248 */
237extern void dccp_write_xmit(struct sock *sk); 249extern void dccp_write_xmit(struct sock *sk);
238extern void dccp_write_space(struct sock *sk); 250extern void dccp_write_space(struct sock *sk);
239extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 251extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b1eaf7bcfb11..2532797a8009 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -241,7 +241,7 @@ static void dccp_xmit_packet(struct sock *sk)
241{ 241{
242 int err, len; 242 int err, len;
243 struct dccp_sock *dp = dccp_sk(sk); 243 struct dccp_sock *dp = dccp_sk(sk);
244 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 244 struct sk_buff *skb = dccp_qpolicy_pop(sk);
245 245
246 if (unlikely(skb == NULL)) 246 if (unlikely(skb == NULL))
247 return; 247 return;
@@ -344,7 +344,7 @@ void dccp_write_xmit(struct sock *sk)
344 struct dccp_sock *dp = dccp_sk(sk); 344 struct dccp_sock *dp = dccp_sk(sk);
345 struct sk_buff *skb; 345 struct sk_buff *skb;
346 346
347 while ((skb = skb_peek(&sk->sk_write_queue))) { 347 while ((skb = dccp_qpolicy_top(sk))) {
348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
349 349
350 switch (ccid_packet_dequeue_eval(rc)) { 350 switch (ccid_packet_dequeue_eval(rc)) {
@@ -358,8 +358,7 @@ void dccp_write_xmit(struct sock *sk)
358 dccp_xmit_packet(sk); 358 dccp_xmit_packet(sk);
359 break; 359 break;
360 case CCID_PACKET_ERR: 360 case CCID_PACKET_ERR:
361 skb_dequeue(&sk->sk_write_queue); 361 dccp_qpolicy_drop(sk, skb);
362 kfree_skb(skb);
363 dccp_pr_debug("packet discarded due to err=%d\n", rc); 362 dccp_pr_debug("packet discarded due to err=%d\n", rc);
364 } 363 }
365 } 364 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8c125ffab1c5..b56efdd2a421 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -189,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
189 dp->dccps_rate_last = jiffies; 189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED; 190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
192 193
193 dccp_init_xmit_timers(sk); 194 dccp_init_xmit_timers(sk);
194 195
@@ -541,6 +542,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
541 case DCCP_SOCKOPT_RECV_CSCOV: 542 case DCCP_SOCKOPT_RECV_CSCOV:
542 err = dccp_setsockopt_cscov(sk, val, true); 543 err = dccp_setsockopt_cscov(sk, val, true);
543 break; 544 break;
545 case DCCP_SOCKOPT_QPOLICY_ID:
546 if (sk->sk_state != DCCP_CLOSED)
547 err = -EISCONN;
548 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
549 err = -EINVAL;
550 else
551 dp->dccps_qpolicy = val;
552 break;
553 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
554 if (val < 0)
555 err = -EINVAL;
556 else
557 dp->dccps_tx_qlen = val;
558 break;
544 default: 559 default:
545 err = -ENOPROTOOPT; 560 err = -ENOPROTOOPT;
546 break; 561 break;
@@ -648,6 +663,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
648 case DCCP_SOCKOPT_RECV_CSCOV: 663 case DCCP_SOCKOPT_RECV_CSCOV:
649 val = dp->dccps_pcrlen; 664 val = dp->dccps_pcrlen;
650 break; 665 break;
666 case DCCP_SOCKOPT_QPOLICY_ID:
667 val = dp->dccps_qpolicy;
668 break;
669 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
670 val = dp->dccps_tx_qlen;
671 break;
651 case 128 ... 191: 672 case 128 ... 191:
652 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 673 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
653 len, (u32 __user *)optval, optlen); 674 len, (u32 __user *)optval, optlen);
@@ -690,6 +711,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
690EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 711EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
691#endif 712#endif
692 713
714static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
715{
716 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
717
718 /*
719 * Assign an (opaque) qpolicy priority value to skb->priority.
720 *
721 * We are overloading this skb field for use with the qpolicy subystem.
722 * The skb->priority is normally used for the SO_PRIORITY option, which
723 * is initialised from sk_priority. Since the assignment of sk_priority
724 * to skb->priority happens later (on layer 3), we overload this field
725 * for use with queueing priorities as long as the skb is on layer 4.
726 * The default priority value (if nothing is set) is 0.
727 */
728 skb->priority = 0;
729
730 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
731
732 if (!CMSG_OK(msg, cmsg))
733 return -EINVAL;
734
735 if (cmsg->cmsg_level != SOL_DCCP)
736 continue;
737
738 switch (cmsg->cmsg_type) {
739 case DCCP_SCM_PRIORITY:
740 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
741 return -EINVAL;
742 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
743 break;
744 default:
745 return -EINVAL;
746 }
747 }
748 return 0;
749}
750
693int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 751int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
694 size_t len) 752 size_t len)
695{ 753{
@@ -705,8 +763,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
705 763
706 lock_sock(sk); 764 lock_sock(sk);
707 765
708 if (sysctl_dccp_tx_qlen && 766 if (dccp_qpolicy_full(sk)) {
709 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
710 rc = -EAGAIN; 767 rc = -EAGAIN;
711 goto out_release; 768 goto out_release;
712 } 769 }
@@ -734,7 +791,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
734 if (rc != 0) 791 if (rc != 0)
735 goto out_discard; 792 goto out_discard;
736 793
737 skb_queue_tail(&sk->sk_write_queue, skb); 794 rc = dccp_msghdr_parse(msg, skb);
795 if (rc != 0)
796 goto out_discard;
797
798 dccp_qpolicy_push(sk, skb);
738 dccp_write_xmit(sk); 799 dccp_write_xmit(sk);
739out_release: 800out_release:
740 release_sock(sk); 801 release_sock(sk);
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..414696b0d830
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76
77} qpol_table[DCCPQ_POLICY_MAX] = {
78 [DCCPQ_POLICY_SIMPLE] = {
79 .push = qpolicy_simple_push,
80 .full = qpolicy_simple_full,
81 .top = qpolicy_simple_top,
82 },
83 [DCCPQ_POLICY_PRIO] = {
84 .push = qpolicy_simple_push,
85 .full = qpolicy_prio_full,
86 .top = qpolicy_prio_best_skb,
87 },
88};
89
90/*
91 * Externally visible interface
92 */
93void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
94{
95 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
96}
97
98bool dccp_qpolicy_full(struct sock *sk)
99{
100 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
101}
102
103void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
104{
105 if (skb != NULL) {
106 skb_unlink(skb, &sk->sk_write_queue);
107 kfree_skb(skb);
108 }
109}
110
111struct sk_buff *dccp_qpolicy_top(struct sock *sk)
112{
113 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
114}
115
116struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
117{
118 struct sk_buff *skb = dccp_qpolicy_top(sk);
119
120 /* Clear any skb fields that we used internally */
121 skb->priority = 0;
122
123 if (skb)
124 skb_unlink(skb, &sk->sk_write_queue);
125 return skb;
126}