aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTomasz Grobelny <tomasz@grobelny.oswiecenia.net>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:39 -0400
commitd6da3511d6b558d0b017777b61dc08b8fbc06ea4 (patch)
tree473f9131b9e641d803bfbea174cf1dfc45aea3ca /net
parentddab05568eaa70fc92b2aae957136f188f724e9c (diff)
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of TX packets and provides two policies: * a simple FIFO policy (which is the default) and * a priority based policy (set via socket options). Both policies honour the tx_qlen sysctl for the maximum size of the write queue (can be overridden via socket options). The priority policy uses skb->priority internally to assign an u32 priority identifier, using the same ranking as SO_PRIORITY. The skb->priority field is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary data using cmsg(3), the patch also provides the requisite parsing routines. Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c67
-rw-r--r--net/dccp/qpolicy.c126
5 files changed, 206 insertions, 8 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index b68440bd7fa2..0c1c9af2bf7e 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o \ 3dccp-y := ccid.o feat.o input.o minisocks.o options.o \
4 output.o proto.o timer.o ackvec.o 4 qpolicy.o output.o proto.o timer.o ackvec.o
5 5
6dccp_ipv4-y := ipv4.o 6dccp_ipv4-y := ipv4.o
7 7
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 74c90cd27677..ce2dd6f6f34d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -234,6 +234,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
234extern void dccp_send_sync(struct sock *sk, const u64 seq, 234extern void dccp_send_sync(struct sock *sk, const u64 seq,
235 const enum dccp_pkt_type pkt_type); 235 const enum dccp_pkt_type pkt_type);
236 236
237/*
238 * TX Packet Dequeueing Interface
239 */
240extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
241extern bool dccp_qpolicy_full(struct sock *sk);
242extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
243extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
244extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
245
246/*
247 * TX Packet Output and TX Timers
248 */
237extern void dccp_write_xmit(struct sock *sk); 249extern void dccp_write_xmit(struct sock *sk);
238extern void dccp_write_space(struct sock *sk); 250extern void dccp_write_space(struct sock *sk);
239extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 251extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b1eaf7bcfb11..2532797a8009 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -241,7 +241,7 @@ static void dccp_xmit_packet(struct sock *sk)
241{ 241{
242 int err, len; 242 int err, len;
243 struct dccp_sock *dp = dccp_sk(sk); 243 struct dccp_sock *dp = dccp_sk(sk);
244 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 244 struct sk_buff *skb = dccp_qpolicy_pop(sk);
245 245
246 if (unlikely(skb == NULL)) 246 if (unlikely(skb == NULL))
247 return; 247 return;
@@ -344,7 +344,7 @@ void dccp_write_xmit(struct sock *sk)
344 struct dccp_sock *dp = dccp_sk(sk); 344 struct dccp_sock *dp = dccp_sk(sk);
345 struct sk_buff *skb; 345 struct sk_buff *skb;
346 346
347 while ((skb = skb_peek(&sk->sk_write_queue))) { 347 while ((skb = dccp_qpolicy_top(sk))) {
348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
349 349
350 switch (ccid_packet_dequeue_eval(rc)) { 350 switch (ccid_packet_dequeue_eval(rc)) {
@@ -358,8 +358,7 @@ void dccp_write_xmit(struct sock *sk)
358 dccp_xmit_packet(sk); 358 dccp_xmit_packet(sk);
359 break; 359 break;
360 case CCID_PACKET_ERR: 360 case CCID_PACKET_ERR:
361 skb_dequeue(&sk->sk_write_queue); 361 dccp_qpolicy_drop(sk, skb);
362 kfree_skb(skb);
363 dccp_pr_debug("packet discarded due to err=%d\n", rc); 362 dccp_pr_debug("packet discarded due to err=%d\n", rc);
364 } 363 }
365 } 364 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8c125ffab1c5..b56efdd2a421 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -189,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
189 dp->dccps_rate_last = jiffies; 189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED; 190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
192 193
193 dccp_init_xmit_timers(sk); 194 dccp_init_xmit_timers(sk);
194 195
@@ -541,6 +542,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
541 case DCCP_SOCKOPT_RECV_CSCOV: 542 case DCCP_SOCKOPT_RECV_CSCOV:
542 err = dccp_setsockopt_cscov(sk, val, true); 543 err = dccp_setsockopt_cscov(sk, val, true);
543 break; 544 break;
545 case DCCP_SOCKOPT_QPOLICY_ID:
546 if (sk->sk_state != DCCP_CLOSED)
547 err = -EISCONN;
548 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
549 err = -EINVAL;
550 else
551 dp->dccps_qpolicy = val;
552 break;
553 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
554 if (val < 0)
555 err = -EINVAL;
556 else
557 dp->dccps_tx_qlen = val;
558 break;
544 default: 559 default:
545 err = -ENOPROTOOPT; 560 err = -ENOPROTOOPT;
546 break; 561 break;
@@ -648,6 +663,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
648 case DCCP_SOCKOPT_RECV_CSCOV: 663 case DCCP_SOCKOPT_RECV_CSCOV:
649 val = dp->dccps_pcrlen; 664 val = dp->dccps_pcrlen;
650 break; 665 break;
666 case DCCP_SOCKOPT_QPOLICY_ID:
667 val = dp->dccps_qpolicy;
668 break;
669 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
670 val = dp->dccps_tx_qlen;
671 break;
651 case 128 ... 191: 672 case 128 ... 191:
652 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 673 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
653 len, (u32 __user *)optval, optlen); 674 len, (u32 __user *)optval, optlen);
@@ -690,6 +711,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
690EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 711EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
691#endif 712#endif
692 713
714static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
715{
716 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
717
718 /*
719 * Assign an (opaque) qpolicy priority value to skb->priority.
720 *
721 * We are overloading this skb field for use with the qpolicy subystem.
722 * The skb->priority is normally used for the SO_PRIORITY option, which
723 * is initialised from sk_priority. Since the assignment of sk_priority
724 * to skb->priority happens later (on layer 3), we overload this field
725 * for use with queueing priorities as long as the skb is on layer 4.
726 * The default priority value (if nothing is set) is 0.
727 */
728 skb->priority = 0;
729
730 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
731
732 if (!CMSG_OK(msg, cmsg))
733 return -EINVAL;
734
735 if (cmsg->cmsg_level != SOL_DCCP)
736 continue;
737
738 switch (cmsg->cmsg_type) {
739 case DCCP_SCM_PRIORITY:
740 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
741 return -EINVAL;
742 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
743 break;
744 default:
745 return -EINVAL;
746 }
747 }
748 return 0;
749}
750
693int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 751int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
694 size_t len) 752 size_t len)
695{ 753{
@@ -705,8 +763,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
705 763
706 lock_sock(sk); 764 lock_sock(sk);
707 765
708 if (sysctl_dccp_tx_qlen && 766 if (dccp_qpolicy_full(sk)) {
709 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
710 rc = -EAGAIN; 767 rc = -EAGAIN;
711 goto out_release; 768 goto out_release;
712 } 769 }
@@ -734,7 +791,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
734 if (rc != 0) 791 if (rc != 0)
735 goto out_discard; 792 goto out_discard;
736 793
737 skb_queue_tail(&sk->sk_write_queue, skb); 794 rc = dccp_msghdr_parse(msg, skb);
795 if (rc != 0)
796 goto out_discard;
797
798 dccp_qpolicy_push(sk, skb);
738 dccp_write_xmit(sk); 799 dccp_write_xmit(sk);
739out_release: 800out_release:
740 release_sock(sk); 801 release_sock(sk);
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..414696b0d830
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76
77} qpol_table[DCCPQ_POLICY_MAX] = {
78 [DCCPQ_POLICY_SIMPLE] = {
79 .push = qpolicy_simple_push,
80 .full = qpolicy_simple_full,
81 .top = qpolicy_simple_top,
82 },
83 [DCCPQ_POLICY_PRIO] = {
84 .push = qpolicy_simple_push,
85 .full = qpolicy_prio_full,
86 .top = qpolicy_prio_best_skb,
87 },
88};
89
90/*
91 * Externally visible interface
92 */
93void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
94{
95 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
96}
97
98bool dccp_qpolicy_full(struct sock *sk)
99{
100 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
101}
102
103void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
104{
105 if (skb != NULL) {
106 skb_unlink(skb, &sk->sk_write_queue);
107 kfree_skb(skb);
108 }
109}
110
111struct sk_buff *dccp_qpolicy_top(struct sock *sk)
112{
113 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
114}
115
116struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
117{
118 struct sk_buff *skb = dccp_qpolicy_top(sk);
119
120 /* Clear any skb fields that we used internally */
121 skb->priority = 0;
122
123 if (skb)
124 skb_unlink(skb, &sk->sk_write_queue);
125 return skb;
126}