aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTomasz Grobelny <tomasz@grobelny.oswiecenia.net>2010-12-04 07:38:01 -0500
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2010-12-07 07:47:12 -0500
commit871a2c16c21b988688b4ab1a78eadd969765c0a3 (patch)
tree34ffb3be1402747ef3b7fdb754fb99778bd45728 /net
parentcfa969e385a23e4c85f50e0ed5de25a2e18bf9d4 (diff)
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c67
-rw-r--r--net/dccp/qpolicy.c126
5 files changed, 207 insertions, 9 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o 3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
4 4 qpolicy.o
5# 5#
6# CCID algorithms to be used by dccp.ko 6# CCID algorithms to be used by dccp.ko
7# 7#
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..d008da91cec2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246/*
247 * TX Packet Dequeueing Interface
248 */
249extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
250extern bool dccp_qpolicy_full(struct sock *sk);
251extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
252extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
253extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
254
255/*
256 * TX Packet Output and TX Timers
257 */
246extern void dccp_write_xmit(struct sock *sk); 258extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 259extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 260extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
242{ 242{
243 int err, len; 243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk); 244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 245 struct sk_buff *skb = dccp_qpolicy_pop(sk);
246 246
247 if (unlikely(skb == NULL)) 247 if (unlikely(skb == NULL))
248 return; 248 return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
345 struct dccp_sock *dp = dccp_sk(sk); 345 struct dccp_sock *dp = dccp_sk(sk);
346 struct sk_buff *skb; 346 struct sk_buff *skb;
347 347
348 while ((skb = skb_peek(&sk->sk_write_queue))) { 348 while ((skb = dccp_qpolicy_top(sk))) {
349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
350 350
351 switch (ccid_packet_dequeue_eval(rc)) { 351 switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
359 dccp_xmit_packet(sk); 359 dccp_xmit_packet(sk);
360 break; 360 break;
361 case CCID_PACKET_ERR: 361 case CCID_PACKET_ERR:
362 skb_dequeue(&sk->sk_write_queue); 362 dccp_qpolicy_drop(sk, skb);
363 kfree_skb(skb);
364 dccp_pr_debug("packet discarded due to err=%d\n", rc); 363 dccp_pr_debug("packet discarded due to err=%d\n", rc);
365 } 364 }
366 } 365 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..d6a224982bb5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
185 dp->dccps_role = DCCP_ROLE_UNDEFINED; 185 dp->dccps_role = DCCP_ROLE_UNDEFINED;
186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
188 189
189 dccp_init_xmit_timers(sk); 190 dccp_init_xmit_timers(sk);
190 191
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
532 case DCCP_SOCKOPT_RECV_CSCOV: 533 case DCCP_SOCKOPT_RECV_CSCOV:
533 err = dccp_setsockopt_cscov(sk, val, true); 534 err = dccp_setsockopt_cscov(sk, val, true);
534 break; 535 break;
536 case DCCP_SOCKOPT_QPOLICY_ID:
537 if (sk->sk_state != DCCP_CLOSED)
538 err = -EISCONN;
539 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
540 err = -EINVAL;
541 else
542 dp->dccps_qpolicy = val;
543 break;
544 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
545 if (val < 0)
546 err = -EINVAL;
547 else
548 dp->dccps_tx_qlen = val;
549 break;
535 default: 550 default:
536 err = -ENOPROTOOPT; 551 err = -ENOPROTOOPT;
537 break; 552 break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 case DCCP_SOCKOPT_RECV_CSCOV: 654 case DCCP_SOCKOPT_RECV_CSCOV:
640 val = dp->dccps_pcrlen; 655 val = dp->dccps_pcrlen;
641 break; 656 break;
657 case DCCP_SOCKOPT_QPOLICY_ID:
658 val = dp->dccps_qpolicy;
659 break;
660 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
661 val = dp->dccps_tx_qlen;
662 break;
642 case 128 ... 191: 663 case 128 ... 191:
643 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 664 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
644 len, (u32 __user *)optval, optlen); 665 len, (u32 __user *)optval, optlen);
@@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
681EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
682#endif 703#endif
683 704
705static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
708
709 /*
710 * Assign an (opaque) qpolicy priority value to skb->priority.
711 *
712 * We are overloading this skb field for use with the qpolicy subystem.
713 * The skb->priority is normally used for the SO_PRIORITY option, which
714 * is initialised from sk_priority. Since the assignment of sk_priority
715 * to skb->priority happens later (on layer 3), we overload this field
716 * for use with queueing priorities as long as the skb is on layer 4.
717 * The default priority value (if nothing is set) is 0.
718 */
719 skb->priority = 0;
720
721 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
722
723 if (!CMSG_OK(msg, cmsg))
724 return -EINVAL;
725
726 if (cmsg->cmsg_level != SOL_DCCP)
727 continue;
728
729 switch (cmsg->cmsg_type) {
730 case DCCP_SCM_PRIORITY:
731 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
732 return -EINVAL;
733 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
734 break;
735 default:
736 return -EINVAL;
737 }
738 }
739 return 0;
740}
741
684int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 742int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
685 size_t len) 743 size_t len)
686{ 744{
@@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 754
697 lock_sock(sk); 755 lock_sock(sk);
698 756
699 if (sysctl_dccp_tx_qlen && 757 if (dccp_qpolicy_full(sk)) {
700 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
701 rc = -EAGAIN; 758 rc = -EAGAIN;
702 goto out_release; 759 goto out_release;
703 } 760 }
@@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
725 if (rc != 0) 782 if (rc != 0)
726 goto out_discard; 783 goto out_discard;
727 784
728 skb_queue_tail(&sk->sk_write_queue, skb); 785 rc = dccp_msghdr_parse(msg, skb);
786 if (rc != 0)
787 goto out_discard;
788
789 dccp_qpolicy_push(sk, skb);
729 /* 790 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire 791 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the 792 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..4b0fd6b11f6d
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76
77} qpol_table[DCCPQ_POLICY_MAX] = {
78 [DCCPQ_POLICY_SIMPLE] = {
79 .push = qpolicy_simple_push,
80 .full = qpolicy_simple_full,
81 .top = qpolicy_simple_top,
82 },
83 [DCCPQ_POLICY_PRIO] = {
84 .push = qpolicy_simple_push,
85 .full = qpolicy_prio_full,
86 .top = qpolicy_prio_best_skb,
87 },
88};
89
90/*
91 * Externally visible interface
92 */
93void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
94{
95 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
96}
97
98bool dccp_qpolicy_full(struct sock *sk)
99{
100 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
101}
102
103void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
104{
105 if (skb != NULL) {
106 skb_unlink(skb, &sk->sk_write_queue);
107 kfree_skb(skb);
108 }
109}
110
111struct sk_buff *dccp_qpolicy_top(struct sock *sk)
112{
113 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
114}
115
116struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
117{
118 struct sk_buff *skb = dccp_qpolicy_top(sk);
119
120 if (skb != NULL) {
121 /* Clear any skb fields that we used internally */
122 skb->priority = 0;
123 skb_unlink(skb, &sk->sk_write_queue);
124 }
125 return skb;
126}