diff options
author | Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | 2010-12-04 07:38:01 -0500 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2010-12-07 07:47:12 -0500 |
commit | 871a2c16c21b988688b4ab1a78eadd969765c0a3 (patch) | |
tree | 34ffb3be1402747ef3b7fdb754fb99778bd45728 /net/dccp | |
parent | cfa969e385a23e4c85f50e0ed5de25a2e18bf9d4 (diff) |
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
* a simple FIFO policy (which is the default) and
* a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).
The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.
Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Makefile | 4 | ||||
-rw-r--r-- | net/dccp/dccp.h | 12 | ||||
-rw-r--r-- | net/dccp/output.c | 7 | ||||
-rw-r--r-- | net/dccp/proto.c | 67 | ||||
-rw-r--r-- | net/dccp/qpolicy.c | 126 |
5 files changed, 207 insertions, 9 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8de..5c8362b037e 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o | 1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o |
2 | 2 | ||
3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o | 3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ |
4 | 4 | qpolicy.o | |
5 | # | 5 | # |
6 | # CCID algorithms to be used by dccp.ko | 6 | # CCID algorithms to be used by dccp.ko |
7 | # | 7 | # |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 19fafd59746..d008da91cec 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
243 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 243 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
244 | const enum dccp_pkt_type pkt_type); | 244 | const enum dccp_pkt_type pkt_type); |
245 | 245 | ||
246 | /* | ||
247 | * TX Packet Dequeueing Interface | ||
248 | */ | ||
249 | extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); | ||
250 | extern bool dccp_qpolicy_full(struct sock *sk); | ||
251 | extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); | ||
252 | extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); | ||
253 | extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); | ||
254 | |||
255 | /* | ||
256 | * TX Packet Output and TX Timers | ||
257 | */ | ||
246 | extern void dccp_write_xmit(struct sock *sk); | 258 | extern void dccp_write_xmit(struct sock *sk); |
247 | extern void dccp_write_space(struct sock *sk); | 259 | extern void dccp_write_space(struct sock *sk); |
248 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); | 260 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); |
diff --git a/net/dccp/output.c b/net/dccp/output.c index d96dd9d362a..784d3021054 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) | |||
242 | { | 242 | { |
243 | int err, len; | 243 | int err, len; |
244 | struct dccp_sock *dp = dccp_sk(sk); | 244 | struct dccp_sock *dp = dccp_sk(sk); |
245 | struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); | 245 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
246 | 246 | ||
247 | if (unlikely(skb == NULL)) | 247 | if (unlikely(skb == NULL)) |
248 | return; | 248 | return; |
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) | |||
345 | struct dccp_sock *dp = dccp_sk(sk); | 345 | struct dccp_sock *dp = dccp_sk(sk); |
346 | struct sk_buff *skb; | 346 | struct sk_buff *skb; |
347 | 347 | ||
348 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 348 | while ((skb = dccp_qpolicy_top(sk))) { |
349 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 349 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
350 | 350 | ||
351 | switch (ccid_packet_dequeue_eval(rc)) { | 351 | switch (ccid_packet_dequeue_eval(rc)) { |
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) | |||
359 | dccp_xmit_packet(sk); | 359 | dccp_xmit_packet(sk); |
360 | break; | 360 | break; |
361 | case CCID_PACKET_ERR: | 361 | case CCID_PACKET_ERR: |
362 | skb_dequeue(&sk->sk_write_queue); | 362 | dccp_qpolicy_drop(sk, skb); |
363 | kfree_skb(skb); | ||
364 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | 363 | dccp_pr_debug("packet discarded due to err=%d\n", rc); |
365 | } | 364 | } |
366 | } | 365 | } |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fce..d6a224982bb 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | 186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; |
187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | 187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; |
188 | dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; | ||
188 | 189 | ||
189 | dccp_init_xmit_timers(sk); | 190 | dccp_init_xmit_timers(sk); |
190 | 191 | ||
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
532 | case DCCP_SOCKOPT_RECV_CSCOV: | 533 | case DCCP_SOCKOPT_RECV_CSCOV: |
533 | err = dccp_setsockopt_cscov(sk, val, true); | 534 | err = dccp_setsockopt_cscov(sk, val, true); |
534 | break; | 535 | break; |
536 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
537 | if (sk->sk_state != DCCP_CLOSED) | ||
538 | err = -EISCONN; | ||
539 | else if (val < 0 || val >= DCCPQ_POLICY_MAX) | ||
540 | err = -EINVAL; | ||
541 | else | ||
542 | dp->dccps_qpolicy = val; | ||
543 | break; | ||
544 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
545 | if (val < 0) | ||
546 | err = -EINVAL; | ||
547 | else | ||
548 | dp->dccps_tx_qlen = val; | ||
549 | break; | ||
535 | default: | 550 | default: |
536 | err = -ENOPROTOOPT; | 551 | err = -ENOPROTOOPT; |
537 | break; | 552 | break; |
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
639 | case DCCP_SOCKOPT_RECV_CSCOV: | 654 | case DCCP_SOCKOPT_RECV_CSCOV: |
640 | val = dp->dccps_pcrlen; | 655 | val = dp->dccps_pcrlen; |
641 | break; | 656 | break; |
657 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
658 | val = dp->dccps_qpolicy; | ||
659 | break; | ||
660 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
661 | val = dp->dccps_tx_qlen; | ||
662 | break; | ||
642 | case 128 ... 191: | 663 | case 128 ... 191: |
643 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 664 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
644 | len, (u32 __user *)optval, optlen); | 665 | len, (u32 __user *)optval, optlen); |
@@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
681 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); | 702 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); |
682 | #endif | 703 | #endif |
683 | 704 | ||
705 | static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) | ||
706 | { | ||
707 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); | ||
708 | |||
709 | /* | ||
710 | * Assign an (opaque) qpolicy priority value to skb->priority. | ||
711 | * | ||
712 | * We are overloading this skb field for use with the qpolicy subystem. | ||
713 | * The skb->priority is normally used for the SO_PRIORITY option, which | ||
714 | * is initialised from sk_priority. Since the assignment of sk_priority | ||
715 | * to skb->priority happens later (on layer 3), we overload this field | ||
716 | * for use with queueing priorities as long as the skb is on layer 4. | ||
717 | * The default priority value (if nothing is set) is 0. | ||
718 | */ | ||
719 | skb->priority = 0; | ||
720 | |||
721 | for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
722 | |||
723 | if (!CMSG_OK(msg, cmsg)) | ||
724 | return -EINVAL; | ||
725 | |||
726 | if (cmsg->cmsg_level != SOL_DCCP) | ||
727 | continue; | ||
728 | |||
729 | switch (cmsg->cmsg_type) { | ||
730 | case DCCP_SCM_PRIORITY: | ||
731 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) | ||
732 | return -EINVAL; | ||
733 | skb->priority = *(__u32 *)CMSG_DATA(cmsg); | ||
734 | break; | ||
735 | default: | ||
736 | return -EINVAL; | ||
737 | } | ||
738 | } | ||
739 | return 0; | ||
740 | } | ||
741 | |||
684 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 742 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
685 | size_t len) | 743 | size_t len) |
686 | { | 744 | { |
@@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
696 | 754 | ||
697 | lock_sock(sk); | 755 | lock_sock(sk); |
698 | 756 | ||
699 | if (sysctl_dccp_tx_qlen && | 757 | if (dccp_qpolicy_full(sk)) { |
700 | (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { | ||
701 | rc = -EAGAIN; | 758 | rc = -EAGAIN; |
702 | goto out_release; | 759 | goto out_release; |
703 | } | 760 | } |
@@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
725 | if (rc != 0) | 782 | if (rc != 0) |
726 | goto out_discard; | 783 | goto out_discard; |
727 | 784 | ||
728 | skb_queue_tail(&sk->sk_write_queue, skb); | 785 | rc = dccp_msghdr_parse(msg, skb); |
786 | if (rc != 0) | ||
787 | goto out_discard; | ||
788 | |||
789 | dccp_qpolicy_push(sk, skb); | ||
729 | /* | 790 | /* |
730 | * The xmit_timer is set if the TX CCID is rate-based and will expire | 791 | * The xmit_timer is set if the TX CCID is rate-based and will expire |
731 | * when congestion control permits to release further packets into the | 792 | * when congestion control permits to release further packets into the |
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 00000000000..4b0fd6b11f6 --- /dev/null +++ b/net/dccp/qpolicy.c | |||
@@ -0,0 +1,126 @@ | |||
1 | /* | ||
2 | * net/dccp/qpolicy.c | ||
3 | * | ||
4 | * Policy-based packet dequeueing interface for DCCP. | ||
5 | * | ||
6 | * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License v2 | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include "dccp.h" | ||
13 | |||
14 | /* | ||
15 | * Simple Dequeueing Policy: | ||
16 | * If tx_qlen is different from 0, enqueue up to tx_qlen elements. | ||
17 | */ | ||
18 | static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) | ||
19 | { | ||
20 | skb_queue_tail(&sk->sk_write_queue, skb); | ||
21 | } | ||
22 | |||
23 | static bool qpolicy_simple_full(struct sock *sk) | ||
24 | { | ||
25 | return dccp_sk(sk)->dccps_tx_qlen && | ||
26 | sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; | ||
27 | } | ||
28 | |||
29 | static struct sk_buff *qpolicy_simple_top(struct sock *sk) | ||
30 | { | ||
31 | return skb_peek(&sk->sk_write_queue); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * Priority-based Dequeueing Policy: | ||
36 | * If tx_qlen is different from 0 and the queue has reached its upper bound | ||
37 | * of tx_qlen elements, replace older packets lowest-priority-first. | ||
38 | */ | ||
39 | static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) | ||
40 | { | ||
41 | struct sk_buff *skb, *best = NULL; | ||
42 | |||
43 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
44 | if (best == NULL || skb->priority > best->priority) | ||
45 | best = skb; | ||
46 | return best; | ||
47 | } | ||
48 | |||
49 | static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) | ||
50 | { | ||
51 | struct sk_buff *skb, *worst = NULL; | ||
52 | |||
53 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
54 | if (worst == NULL || skb->priority < worst->priority) | ||
55 | worst = skb; | ||
56 | return worst; | ||
57 | } | ||
58 | |||
59 | static bool qpolicy_prio_full(struct sock *sk) | ||
60 | { | ||
61 | if (qpolicy_simple_full(sk)) | ||
62 | dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); | ||
63 | return false; | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface | ||
68 | * @push: add a new @skb to the write queue | ||
69 | * @full: indicates that no more packets will be admitted | ||
70 | * @top: peeks at whatever the queueing policy defines as its `top' | ||
71 | */ | ||
72 | static struct dccp_qpolicy_operations { | ||
73 | void (*push) (struct sock *sk, struct sk_buff *skb); | ||
74 | bool (*full) (struct sock *sk); | ||
75 | struct sk_buff* (*top) (struct sock *sk); | ||
76 | |||
77 | } qpol_table[DCCPQ_POLICY_MAX] = { | ||
78 | [DCCPQ_POLICY_SIMPLE] = { | ||
79 | .push = qpolicy_simple_push, | ||
80 | .full = qpolicy_simple_full, | ||
81 | .top = qpolicy_simple_top, | ||
82 | }, | ||
83 | [DCCPQ_POLICY_PRIO] = { | ||
84 | .push = qpolicy_simple_push, | ||
85 | .full = qpolicy_prio_full, | ||
86 | .top = qpolicy_prio_best_skb, | ||
87 | }, | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * Externally visible interface | ||
92 | */ | ||
93 | void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) | ||
94 | { | ||
95 | qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); | ||
96 | } | ||
97 | |||
98 | bool dccp_qpolicy_full(struct sock *sk) | ||
99 | { | ||
100 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); | ||
101 | } | ||
102 | |||
103 | void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) | ||
104 | { | ||
105 | if (skb != NULL) { | ||
106 | skb_unlink(skb, &sk->sk_write_queue); | ||
107 | kfree_skb(skb); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | struct sk_buff *dccp_qpolicy_top(struct sock *sk) | ||
112 | { | ||
113 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); | ||
114 | } | ||
115 | |||
116 | struct sk_buff *dccp_qpolicy_pop(struct sock *sk) | ||
117 | { | ||
118 | struct sk_buff *skb = dccp_qpolicy_top(sk); | ||
119 | |||
120 | if (skb != NULL) { | ||
121 | /* Clear any skb fields that we used internally */ | ||
122 | skb->priority = 0; | ||
123 | skb_unlink(skb, &sk->sk_write_queue); | ||
124 | } | ||
125 | return skb; | ||
126 | } | ||