diff options
author | Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | 2008-09-04 01:30:19 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-04 01:45:39 -0400 |
commit | d6da3511d6b558d0b017777b61dc08b8fbc06ea4 (patch) | |
tree | 473f9131b9e641d803bfbea174cf1dfc45aea3ca /net | |
parent | ddab05568eaa70fc92b2aae957136f188f724e9c (diff) |
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
* a simple FIFO policy (which is the default) and
* a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).
The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.
Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net')
-rw-r--r-- | net/dccp/Makefile | 2 | ||||
-rw-r--r-- | net/dccp/dccp.h | 12 | ||||
-rw-r--r-- | net/dccp/output.c | 7 | ||||
-rw-r--r-- | net/dccp/proto.c | 67 | ||||
-rw-r--r-- | net/dccp/qpolicy.c | 126 |
5 files changed, 206 insertions, 8 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index b68440bd7fa2..0c1c9af2bf7e 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o | 1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o |
2 | 2 | ||
3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o \ | 3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o \ |
4 | output.o proto.o timer.o ackvec.o | 4 | qpolicy.o output.o proto.o timer.o ackvec.o |
5 | 5 | ||
6 | dccp_ipv4-y := ipv4.o | 6 | dccp_ipv4-y := ipv4.o |
7 | 7 | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 74c90cd27677..ce2dd6f6f34d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -234,6 +234,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
234 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 234 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
235 | const enum dccp_pkt_type pkt_type); | 235 | const enum dccp_pkt_type pkt_type); |
236 | 236 | ||
237 | /* | ||
238 | * TX Packet Dequeueing Interface | ||
239 | */ | ||
240 | extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); | ||
241 | extern bool dccp_qpolicy_full(struct sock *sk); | ||
242 | extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); | ||
243 | extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); | ||
244 | extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); | ||
245 | |||
246 | /* | ||
247 | * TX Packet Output and TX Timers | ||
248 | */ | ||
237 | extern void dccp_write_xmit(struct sock *sk); | 249 | extern void dccp_write_xmit(struct sock *sk); |
238 | extern void dccp_write_space(struct sock *sk); | 250 | extern void dccp_write_space(struct sock *sk); |
239 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); | 251 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); |
diff --git a/net/dccp/output.c b/net/dccp/output.c index b1eaf7bcfb11..2532797a8009 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -241,7 +241,7 @@ static void dccp_xmit_packet(struct sock *sk) | |||
241 | { | 241 | { |
242 | int err, len; | 242 | int err, len; |
243 | struct dccp_sock *dp = dccp_sk(sk); | 243 | struct dccp_sock *dp = dccp_sk(sk); |
244 | struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); | 244 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
245 | 245 | ||
246 | if (unlikely(skb == NULL)) | 246 | if (unlikely(skb == NULL)) |
247 | return; | 247 | return; |
@@ -344,7 +344,7 @@ void dccp_write_xmit(struct sock *sk) | |||
344 | struct dccp_sock *dp = dccp_sk(sk); | 344 | struct dccp_sock *dp = dccp_sk(sk); |
345 | struct sk_buff *skb; | 345 | struct sk_buff *skb; |
346 | 346 | ||
347 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 347 | while ((skb = dccp_qpolicy_top(sk))) { |
348 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 348 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
349 | 349 | ||
350 | switch (ccid_packet_dequeue_eval(rc)) { | 350 | switch (ccid_packet_dequeue_eval(rc)) { |
@@ -358,8 +358,7 @@ void dccp_write_xmit(struct sock *sk) | |||
358 | dccp_xmit_packet(sk); | 358 | dccp_xmit_packet(sk); |
359 | break; | 359 | break; |
360 | case CCID_PACKET_ERR: | 360 | case CCID_PACKET_ERR: |
361 | skb_dequeue(&sk->sk_write_queue); | 361 | dccp_qpolicy_drop(sk, skb); |
362 | kfree_skb(skb); | ||
363 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | 362 | dccp_pr_debug("packet discarded due to err=%d\n", rc); |
364 | } | 363 | } |
365 | } | 364 | } |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8c125ffab1c5..b56efdd2a421 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -189,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
189 | dp->dccps_rate_last = jiffies; | 189 | dp->dccps_rate_last = jiffies; |
190 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 190 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
191 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | 191 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; |
192 | dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; | ||
192 | 193 | ||
193 | dccp_init_xmit_timers(sk); | 194 | dccp_init_xmit_timers(sk); |
194 | 195 | ||
@@ -541,6 +542,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
541 | case DCCP_SOCKOPT_RECV_CSCOV: | 542 | case DCCP_SOCKOPT_RECV_CSCOV: |
542 | err = dccp_setsockopt_cscov(sk, val, true); | 543 | err = dccp_setsockopt_cscov(sk, val, true); |
543 | break; | 544 | break; |
545 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
546 | if (sk->sk_state != DCCP_CLOSED) | ||
547 | err = -EISCONN; | ||
548 | else if (val < 0 || val >= DCCPQ_POLICY_MAX) | ||
549 | err = -EINVAL; | ||
550 | else | ||
551 | dp->dccps_qpolicy = val; | ||
552 | break; | ||
553 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
554 | if (val < 0) | ||
555 | err = -EINVAL; | ||
556 | else | ||
557 | dp->dccps_tx_qlen = val; | ||
558 | break; | ||
544 | default: | 559 | default: |
545 | err = -ENOPROTOOPT; | 560 | err = -ENOPROTOOPT; |
546 | break; | 561 | break; |
@@ -648,6 +663,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
648 | case DCCP_SOCKOPT_RECV_CSCOV: | 663 | case DCCP_SOCKOPT_RECV_CSCOV: |
649 | val = dp->dccps_pcrlen; | 664 | val = dp->dccps_pcrlen; |
650 | break; | 665 | break; |
666 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
667 | val = dp->dccps_qpolicy; | ||
668 | break; | ||
669 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
670 | val = dp->dccps_tx_qlen; | ||
671 | break; | ||
651 | case 128 ... 191: | 672 | case 128 ... 191: |
652 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 673 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
653 | len, (u32 __user *)optval, optlen); | 674 | len, (u32 __user *)optval, optlen); |
@@ -690,6 +711,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
690 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); | 711 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); |
691 | #endif | 712 | #endif |
692 | 713 | ||
714 | static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) | ||
715 | { | ||
716 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); | ||
717 | |||
718 | /* | ||
719 | * Assign an (opaque) qpolicy priority value to skb->priority. | ||
720 | * | ||
721 | * We are overloading this skb field for use with the qpolicy subystem. | ||
722 | * The skb->priority is normally used for the SO_PRIORITY option, which | ||
723 | * is initialised from sk_priority. Since the assignment of sk_priority | ||
724 | * to skb->priority happens later (on layer 3), we overload this field | ||
725 | * for use with queueing priorities as long as the skb is on layer 4. | ||
726 | * The default priority value (if nothing is set) is 0. | ||
727 | */ | ||
728 | skb->priority = 0; | ||
729 | |||
730 | for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
731 | |||
732 | if (!CMSG_OK(msg, cmsg)) | ||
733 | return -EINVAL; | ||
734 | |||
735 | if (cmsg->cmsg_level != SOL_DCCP) | ||
736 | continue; | ||
737 | |||
738 | switch (cmsg->cmsg_type) { | ||
739 | case DCCP_SCM_PRIORITY: | ||
740 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) | ||
741 | return -EINVAL; | ||
742 | skb->priority = *(__u32 *)CMSG_DATA(cmsg); | ||
743 | break; | ||
744 | default: | ||
745 | return -EINVAL; | ||
746 | } | ||
747 | } | ||
748 | return 0; | ||
749 | } | ||
750 | |||
693 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 751 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
694 | size_t len) | 752 | size_t len) |
695 | { | 753 | { |
@@ -705,8 +763,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
705 | 763 | ||
706 | lock_sock(sk); | 764 | lock_sock(sk); |
707 | 765 | ||
708 | if (sysctl_dccp_tx_qlen && | 766 | if (dccp_qpolicy_full(sk)) { |
709 | (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { | ||
710 | rc = -EAGAIN; | 767 | rc = -EAGAIN; |
711 | goto out_release; | 768 | goto out_release; |
712 | } | 769 | } |
@@ -734,7 +791,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
734 | if (rc != 0) | 791 | if (rc != 0) |
735 | goto out_discard; | 792 | goto out_discard; |
736 | 793 | ||
737 | skb_queue_tail(&sk->sk_write_queue, skb); | 794 | rc = dccp_msghdr_parse(msg, skb); |
795 | if (rc != 0) | ||
796 | goto out_discard; | ||
797 | |||
798 | dccp_qpolicy_push(sk, skb); | ||
738 | dccp_write_xmit(sk); | 799 | dccp_write_xmit(sk); |
739 | out_release: | 800 | out_release: |
740 | release_sock(sk); | 801 | release_sock(sk); |
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..414696b0d830 --- /dev/null +++ b/net/dccp/qpolicy.c | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * net/dccp/qpolicy.c | ||
3 | * | ||
4 | * Policy-based packet dequeueing interface for DCCP. | ||
5 | * | ||
6 | * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License v2 | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include "dccp.h" | ||
13 | |||
14 | /* | ||
15 | * Simple Dequeueing Policy: | ||
16 | * If tx_qlen is different from 0, enqueue up to tx_qlen elements. | ||
17 | */ | ||
18 | static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) | ||
19 | { | ||
20 | skb_queue_tail(&sk->sk_write_queue, skb); | ||
21 | } | ||
22 | |||
23 | static bool qpolicy_simple_full(struct sock *sk) | ||
24 | { | ||
25 | return dccp_sk(sk)->dccps_tx_qlen && | ||
26 | sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; | ||
27 | } | ||
28 | |||
29 | static struct sk_buff *qpolicy_simple_top(struct sock *sk) | ||
30 | { | ||
31 | return skb_peek(&sk->sk_write_queue); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * Priority-based Dequeueing Policy: | ||
36 | * If tx_qlen is different from 0 and the queue has reached its upper bound | ||
37 | * of tx_qlen elements, replace older packets lowest-priority-first. | ||
38 | */ | ||
39 | static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) | ||
40 | { | ||
41 | struct sk_buff *skb, *best = NULL; | ||
42 | |||
43 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
44 | if (best == NULL || skb->priority > best->priority) | ||
45 | best = skb; | ||
46 | return best; | ||
47 | } | ||
48 | |||
49 | static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) | ||
50 | { | ||
51 | struct sk_buff *skb, *worst = NULL; | ||
52 | |||
53 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
54 | if (worst == NULL || skb->priority < worst->priority) | ||
55 | worst = skb; | ||
56 | return worst; | ||
57 | } | ||
58 | |||
59 | static bool qpolicy_prio_full(struct sock *sk) | ||
60 | { | ||
61 | if (qpolicy_simple_full(sk)) | ||
62 | dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); | ||
63 | return false; | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface | ||
68 | * @push: add a new @skb to the write queue | ||
69 | * @full: indicates that no more packets will be admitted | ||
70 | * @top: peeks at whatever the queueing policy defines as its `top' | ||
71 | */ | ||
72 | static struct dccp_qpolicy_operations { | ||
73 | void (*push) (struct sock *sk, struct sk_buff *skb); | ||
74 | bool (*full) (struct sock *sk); | ||
75 | struct sk_buff* (*top) (struct sock *sk); | ||
76 | |||
77 | } qpol_table[DCCPQ_POLICY_MAX] = { | ||
78 | [DCCPQ_POLICY_SIMPLE] = { | ||
79 | .push = qpolicy_simple_push, | ||
80 | .full = qpolicy_simple_full, | ||
81 | .top = qpolicy_simple_top, | ||
82 | }, | ||
83 | [DCCPQ_POLICY_PRIO] = { | ||
84 | .push = qpolicy_simple_push, | ||
85 | .full = qpolicy_prio_full, | ||
86 | .top = qpolicy_prio_best_skb, | ||
87 | }, | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * Externally visible interface | ||
92 | */ | ||
93 | void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) | ||
94 | { | ||
95 | qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); | ||
96 | } | ||
97 | |||
98 | bool dccp_qpolicy_full(struct sock *sk) | ||
99 | { | ||
100 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); | ||
101 | } | ||
102 | |||
103 | void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) | ||
104 | { | ||
105 | if (skb != NULL) { | ||
106 | skb_unlink(skb, &sk->sk_write_queue); | ||
107 | kfree_skb(skb); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | struct sk_buff *dccp_qpolicy_top(struct sock *sk) | ||
112 | { | ||
113 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); | ||
114 | } | ||
115 | |||
116 | struct sk_buff *dccp_qpolicy_pop(struct sock *sk) | ||
117 | { | ||
118 | struct sk_buff *skb = dccp_qpolicy_top(sk); | ||
119 | |||
120 | /* Clear any skb fields that we used internally */ | ||
121 | skb->priority = 0; | ||
122 | |||
123 | if (skb) | ||
124 | skb_unlink(skb, &sk->sk_write_queue); | ||
125 | return skb; | ||
126 | } | ||