aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomasz Grobelny <tomasz@grobelny.oswiecenia.net>2010-12-04 07:38:01 -0500
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2010-12-07 07:47:12 -0500
commit871a2c16c21b988688b4ab1a78eadd969765c0a3 (patch)
tree34ffb3be1402747ef3b7fdb754fb99778bd45728
parentcfa969e385a23e4c85f50e0ed5de25a2e18bf9d4 (diff)
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-rw-r--r--Documentation/networking/dccp.txt20
-rw-r--r--include/linux/dccp.h21
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c67
-rw-r--r--net/dccp/qpolicy.c126
7 files changed, 248 insertions, 9 deletions
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 271d524a4c8d..b395ca6a49f2 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -47,6 +47,26 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
47 47
48Socket options 48Socket options
49============== 49==============
50DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
51a policy ID as argument and can only be set before the connection (i.e. changes
52during an established connection are not supported). Currently, two policies are
53defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
54and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
55u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
56a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
57be formatted using a cmsg(3) message header filled in as follows:
58 cmsg->cmsg_level = SOL_DCCP;
59 cmsg->cmsg_type = DCCP_SCM_PRIORITY;
60 cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
61
62DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
63value is always interpreted as unbounded queue length. If different from zero,
64the interpretation of this parameter depends on the current dequeuing policy
65(see above): the "simple" policy will enforce a fixed queue size by returning
66EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
67lowest-priority packet first. The default value for this parameter is
68initialised from /proc/sys/net/dccp/default/tx_qlen.
69
50DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of 70DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
51service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, 71service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
52the socket will fall back to 0 (which means that no meaningful service code 72the socket will fall back to 0 (which means that no meaningful service code
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d0..010e2d87ed75 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
197 DCCPF_MAX_CCID_SPECIFIC = 255, 197 DCCPF_MAX_CCID_SPECIFIC = 255,
198}; 198};
199 199
200/* DCCP socket control message types for cmsg */
201enum dccp_cmsg_type {
202 DCCP_SCM_PRIORITY = 1,
203 DCCP_SCM_QPOLICY_MAX = 0xFFFF,
204 /* ^-- Up to here reserved exclusively for qpolicy parameters */
205 DCCP_SCM_MAX
206};
207
208/* DCCP priorities for outgoing/queued packets */
209enum dccp_packet_dequeueing_policy {
210 DCCPQ_POLICY_SIMPLE,
211 DCCPQ_POLICY_PRIO,
212 DCCPQ_POLICY_MAX
213};
214
200/* DCCP socket options */ 215/* DCCP socket options */
201#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ 216#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */
202#define DCCP_SOCKOPT_SERVICE 2 217#define DCCP_SOCKOPT_SERVICE 2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
210#define DCCP_SOCKOPT_CCID 13 225#define DCCP_SOCKOPT_CCID 13
211#define DCCP_SOCKOPT_TX_CCID 14 226#define DCCP_SOCKOPT_TX_CCID 14
212#define DCCP_SOCKOPT_RX_CCID 15 227#define DCCP_SOCKOPT_RX_CCID 15
228#define DCCP_SOCKOPT_QPOLICY_ID 16
229#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17
213#define DCCP_SOCKOPT_CCID_RX_INFO 128 230#define DCCP_SOCKOPT_CCID_RX_INFO 128
214#define DCCP_SOCKOPT_CCID_TX_INFO 192 231#define DCCP_SOCKOPT_CCID_TX_INFO 192
215 232
@@ -458,6 +475,8 @@ struct dccp_ackvec;
458 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) 475 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
459 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) 476 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
460 * @dccps_options_received - parsed set of retrieved options 477 * @dccps_options_received - parsed set of retrieved options
478 * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
479 * @dccps_tx_qlen - maximum length of the TX queue
461 * @dccps_role - role of this sock, one of %dccp_role 480 * @dccps_role - role of this sock, one of %dccp_role
462 * @dccps_hc_rx_insert_options - receiver wants to add options when acking 481 * @dccps_hc_rx_insert_options - receiver wants to add options when acking
463 * @dccps_hc_tx_insert_options - sender wants to add options when sending 482 * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
500 struct ccid *dccps_hc_rx_ccid; 519 struct ccid *dccps_hc_rx_ccid;
501 struct ccid *dccps_hc_tx_ccid; 520 struct ccid *dccps_hc_tx_ccid;
502 struct dccp_options_received dccps_options_received; 521 struct dccp_options_received dccps_options_received;
522 __u8 dccps_qpolicy;
523 __u32 dccps_tx_qlen;
503 enum dccp_role dccps_role:2; 524 enum dccp_role dccps_role:2;
504 __u8 dccps_hc_rx_insert_options:1; 525 __u8 dccps_hc_rx_insert_options:1;
505 __u8 dccps_hc_tx_insert_options:1; 526 __u8 dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o 3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
4 4 qpolicy.o
5# 5#
6# CCID algorithms to be used by dccp.ko 6# CCID algorithms to be used by dccp.ko
7# 7#
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..d008da91cec2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246/*
247 * TX Packet Dequeueing Interface
248 */
249extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
250extern bool dccp_qpolicy_full(struct sock *sk);
251extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
252extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
253extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
254
255/*
256 * TX Packet Output and TX Timers
257 */
246extern void dccp_write_xmit(struct sock *sk); 258extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 259extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 260extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
242{ 242{
243 int err, len; 243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk); 244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 245 struct sk_buff *skb = dccp_qpolicy_pop(sk);
246 246
247 if (unlikely(skb == NULL)) 247 if (unlikely(skb == NULL))
248 return; 248 return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
345 struct dccp_sock *dp = dccp_sk(sk); 345 struct dccp_sock *dp = dccp_sk(sk);
346 struct sk_buff *skb; 346 struct sk_buff *skb;
347 347
348 while ((skb = skb_peek(&sk->sk_write_queue))) { 348 while ((skb = dccp_qpolicy_top(sk))) {
349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
350 350
351 switch (ccid_packet_dequeue_eval(rc)) { 351 switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
359 dccp_xmit_packet(sk); 359 dccp_xmit_packet(sk);
360 break; 360 break;
361 case CCID_PACKET_ERR: 361 case CCID_PACKET_ERR:
362 skb_dequeue(&sk->sk_write_queue); 362 dccp_qpolicy_drop(sk, skb);
363 kfree_skb(skb);
364 dccp_pr_debug("packet discarded due to err=%d\n", rc); 363 dccp_pr_debug("packet discarded due to err=%d\n", rc);
365 } 364 }
366 } 365 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..d6a224982bb5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
185 dp->dccps_role = DCCP_ROLE_UNDEFINED; 185 dp->dccps_role = DCCP_ROLE_UNDEFINED;
186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
188 189
189 dccp_init_xmit_timers(sk); 190 dccp_init_xmit_timers(sk);
190 191
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
532 case DCCP_SOCKOPT_RECV_CSCOV: 533 case DCCP_SOCKOPT_RECV_CSCOV:
533 err = dccp_setsockopt_cscov(sk, val, true); 534 err = dccp_setsockopt_cscov(sk, val, true);
534 break; 535 break;
536 case DCCP_SOCKOPT_QPOLICY_ID:
537 if (sk->sk_state != DCCP_CLOSED)
538 err = -EISCONN;
539 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
540 err = -EINVAL;
541 else
542 dp->dccps_qpolicy = val;
543 break;
544 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
545 if (val < 0)
546 err = -EINVAL;
547 else
548 dp->dccps_tx_qlen = val;
549 break;
535 default: 550 default:
536 err = -ENOPROTOOPT; 551 err = -ENOPROTOOPT;
537 break; 552 break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 case DCCP_SOCKOPT_RECV_CSCOV: 654 case DCCP_SOCKOPT_RECV_CSCOV:
640 val = dp->dccps_pcrlen; 655 val = dp->dccps_pcrlen;
641 break; 656 break;
657 case DCCP_SOCKOPT_QPOLICY_ID:
658 val = dp->dccps_qpolicy;
659 break;
660 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
661 val = dp->dccps_tx_qlen;
662 break;
642 case 128 ... 191: 663 case 128 ... 191:
643 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 664 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
644 len, (u32 __user *)optval, optlen); 665 len, (u32 __user *)optval, optlen);
@@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
681EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
682#endif 703#endif
683 704
705static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
708
709 /*
710 * Assign an (opaque) qpolicy priority value to skb->priority.
711 *
712 * We are overloading this skb field for use with the qpolicy subystem.
713 * The skb->priority is normally used for the SO_PRIORITY option, which
714 * is initialised from sk_priority. Since the assignment of sk_priority
715 * to skb->priority happens later (on layer 3), we overload this field
716 * for use with queueing priorities as long as the skb is on layer 4.
717 * The default priority value (if nothing is set) is 0.
718 */
719 skb->priority = 0;
720
721 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
722
723 if (!CMSG_OK(msg, cmsg))
724 return -EINVAL;
725
726 if (cmsg->cmsg_level != SOL_DCCP)
727 continue;
728
729 switch (cmsg->cmsg_type) {
730 case DCCP_SCM_PRIORITY:
731 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
732 return -EINVAL;
733 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
734 break;
735 default:
736 return -EINVAL;
737 }
738 }
739 return 0;
740}
741
684int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 742int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
685 size_t len) 743 size_t len)
686{ 744{
@@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 754
697 lock_sock(sk); 755 lock_sock(sk);
698 756
699 if (sysctl_dccp_tx_qlen && 757 if (dccp_qpolicy_full(sk)) {
700 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
701 rc = -EAGAIN; 758 rc = -EAGAIN;
702 goto out_release; 759 goto out_release;
703 } 760 }
@@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
725 if (rc != 0) 782 if (rc != 0)
726 goto out_discard; 783 goto out_discard;
727 784
728 skb_queue_tail(&sk->sk_write_queue, skb); 785 rc = dccp_msghdr_parse(msg, skb);
786 if (rc != 0)
787 goto out_discard;
788
789 dccp_qpolicy_push(sk, skb);
729 /* 790 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire 791 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the 792 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..4b0fd6b11f6d
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76
77} qpol_table[DCCPQ_POLICY_MAX] = {
78 [DCCPQ_POLICY_SIMPLE] = {
79 .push = qpolicy_simple_push,
80 .full = qpolicy_simple_full,
81 .top = qpolicy_simple_top,
82 },
83 [DCCPQ_POLICY_PRIO] = {
84 .push = qpolicy_simple_push,
85 .full = qpolicy_prio_full,
86 .top = qpolicy_prio_best_skb,
87 },
88};
89
90/*
91 * Externally visible interface
92 */
93void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
94{
95 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
96}
97
98bool dccp_qpolicy_full(struct sock *sk)
99{
100 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
101}
102
103void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
104{
105 if (skb != NULL) {
106 skb_unlink(skb, &sk->sk_write_queue);
107 kfree_skb(skb);
108 }
109}
110
111struct sk_buff *dccp_qpolicy_top(struct sock *sk)
112{
113 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
114}
115
116struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
117{
118 struct sk_buff *skb = dccp_qpolicy_top(sk);
119
120 if (skb != NULL) {
121 /* Clear any skb fields that we used internally */
122 skb->priority = 0;
123 skb_unlink(skb, &sk->sk_write_queue);
124 }
125 return skb;
126}