aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-12-08 13:01:00 -0500
committerDavid S. Miller <davem@davemloft.net>2010-12-08 13:01:00 -0500
commita2d4b65d477aad1fe8c7218781a031fa9cf5abfc (patch)
tree940fa997e1a2897999bf92361e80162d6bdbbaad
parent01b0c5cfb23f19837650aa53495ace6d0fd7d3f8 (diff)
parent04910265078f08a73208beab70ed2a3cce4a919f (diff)
Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/net-next-2.6
-rw-r--r--Documentation/networking/dccp.txt20
-rw-r--r--include/linux/dccp.h21
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/dccp.h13
-rw-r--r--net/dccp/output.c7
-rw-r--r--net/dccp/proto.c71
-rw-r--r--net/dccp/qpolicy.c137
7 files changed, 264 insertions, 9 deletions
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 271d524a4c8d..b395ca6a49f2 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -47,6 +47,26 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
47 47
48Socket options 48Socket options
49============== 49==============
50DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
51a policy ID as argument and can only be set before the connection (i.e. changes
52during an established connection are not supported). Currently, two policies are
53defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
54and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
55u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
56a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
57be formatted using a cmsg(3) message header filled in as follows:
58 cmsg->cmsg_level = SOL_DCCP;
59 cmsg->cmsg_type = DCCP_SCM_PRIORITY;
60 cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
61
62DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
63value is always interpreted as unbounded queue length. If different from zero,
64the interpretation of this parameter depends on the current dequeuing policy
65(see above): the "simple" policy will enforce a fixed queue size by returning
66EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
67lowest-priority packet first. The default value for this parameter is
68initialised from /proc/sys/net/dccp/default/tx_qlen.
69
50DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of 70DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
51service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, 71service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
52the socket will fall back to 0 (which means that no meaningful service code 72the socket will fall back to 0 (which means that no meaningful service code
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d0..010e2d87ed75 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
197 DCCPF_MAX_CCID_SPECIFIC = 255, 197 DCCPF_MAX_CCID_SPECIFIC = 255,
198}; 198};
199 199
200/* DCCP socket control message types for cmsg */
201enum dccp_cmsg_type {
202 DCCP_SCM_PRIORITY = 1,
203 DCCP_SCM_QPOLICY_MAX = 0xFFFF,
204 /* ^-- Up to here reserved exclusively for qpolicy parameters */
205 DCCP_SCM_MAX
206};
207
208/* DCCP priorities for outgoing/queued packets */
209enum dccp_packet_dequeueing_policy {
210 DCCPQ_POLICY_SIMPLE,
211 DCCPQ_POLICY_PRIO,
212 DCCPQ_POLICY_MAX
213};
214
200/* DCCP socket options */ 215/* DCCP socket options */
201#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ 216#define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */
202#define DCCP_SOCKOPT_SERVICE 2 217#define DCCP_SOCKOPT_SERVICE 2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
210#define DCCP_SOCKOPT_CCID 13 225#define DCCP_SOCKOPT_CCID 13
211#define DCCP_SOCKOPT_TX_CCID 14 226#define DCCP_SOCKOPT_TX_CCID 14
212#define DCCP_SOCKOPT_RX_CCID 15 227#define DCCP_SOCKOPT_RX_CCID 15
228#define DCCP_SOCKOPT_QPOLICY_ID 16
229#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17
213#define DCCP_SOCKOPT_CCID_RX_INFO 128 230#define DCCP_SOCKOPT_CCID_RX_INFO 128
214#define DCCP_SOCKOPT_CCID_TX_INFO 192 231#define DCCP_SOCKOPT_CCID_TX_INFO 192
215 232
@@ -458,6 +475,8 @@ struct dccp_ackvec;
458 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) 475 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
459 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) 476 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
460 * @dccps_options_received - parsed set of retrieved options 477 * @dccps_options_received - parsed set of retrieved options
478 * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
479 * @dccps_tx_qlen - maximum length of the TX queue
461 * @dccps_role - role of this sock, one of %dccp_role 480 * @dccps_role - role of this sock, one of %dccp_role
462 * @dccps_hc_rx_insert_options - receiver wants to add options when acking 481 * @dccps_hc_rx_insert_options - receiver wants to add options when acking
463 * @dccps_hc_tx_insert_options - sender wants to add options when sending 482 * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
500 struct ccid *dccps_hc_rx_ccid; 519 struct ccid *dccps_hc_rx_ccid;
501 struct ccid *dccps_hc_tx_ccid; 520 struct ccid *dccps_hc_tx_ccid;
502 struct dccp_options_received dccps_options_received; 521 struct dccp_options_received dccps_options_received;
522 __u8 dccps_qpolicy;
523 __u32 dccps_tx_qlen;
503 enum dccp_role dccps_role:2; 524 enum dccp_role dccps_role:2;
504 __u8 dccps_hc_rx_insert_options:1; 525 __u8 dccps_hc_rx_insert_options:1;
505 __u8 dccps_hc_tx_insert_options:1; 526 __u8 dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o 3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
4 4 qpolicy.o
5# 5#
6# CCID algorithms to be used by dccp.ko 6# CCID algorithms to be used by dccp.ko
7# 7#
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..48ad5d9da7cb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246/*
247 * TX Packet Dequeueing Interface
248 */
249extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
250extern bool dccp_qpolicy_full(struct sock *sk);
251extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
252extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
253extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
254extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
255
256/*
257 * TX Packet Output and TX Timers
258 */
246extern void dccp_write_xmit(struct sock *sk); 259extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 260extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 261extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
242{ 242{
243 int err, len; 243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk); 244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 245 struct sk_buff *skb = dccp_qpolicy_pop(sk);
246 246
247 if (unlikely(skb == NULL)) 247 if (unlikely(skb == NULL))
248 return; 248 return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
345 struct dccp_sock *dp = dccp_sk(sk); 345 struct dccp_sock *dp = dccp_sk(sk);
346 struct sk_buff *skb; 346 struct sk_buff *skb;
347 347
348 while ((skb = skb_peek(&sk->sk_write_queue))) { 348 while ((skb = dccp_qpolicy_top(sk))) {
349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
350 350
351 switch (ccid_packet_dequeue_eval(rc)) { 351 switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
359 dccp_xmit_packet(sk); 359 dccp_xmit_packet(sk);
360 break; 360 break;
361 case CCID_PACKET_ERR: 361 case CCID_PACKET_ERR:
362 skb_dequeue(&sk->sk_write_queue); 362 dccp_qpolicy_drop(sk, skb);
363 kfree_skb(skb);
364 dccp_pr_debug("packet discarded due to err=%d\n", rc); 363 dccp_pr_debug("packet discarded due to err=%d\n", rc);
365 } 364 }
366 } 365 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..152975d942d9 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
185 dp->dccps_role = DCCP_ROLE_UNDEFINED; 185 dp->dccps_role = DCCP_ROLE_UNDEFINED;
186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
188 189
189 dccp_init_xmit_timers(sk); 190 dccp_init_xmit_timers(sk);
190 191
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
532 case DCCP_SOCKOPT_RECV_CSCOV: 533 case DCCP_SOCKOPT_RECV_CSCOV:
533 err = dccp_setsockopt_cscov(sk, val, true); 534 err = dccp_setsockopt_cscov(sk, val, true);
534 break; 535 break;
536 case DCCP_SOCKOPT_QPOLICY_ID:
537 if (sk->sk_state != DCCP_CLOSED)
538 err = -EISCONN;
539 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
540 err = -EINVAL;
541 else
542 dp->dccps_qpolicy = val;
543 break;
544 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
545 if (val < 0)
546 err = -EINVAL;
547 else
548 dp->dccps_tx_qlen = val;
549 break;
535 default: 550 default:
536 err = -ENOPROTOOPT; 551 err = -ENOPROTOOPT;
537 break; 552 break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 case DCCP_SOCKOPT_RECV_CSCOV: 654 case DCCP_SOCKOPT_RECV_CSCOV:
640 val = dp->dccps_pcrlen; 655 val = dp->dccps_pcrlen;
641 break; 656 break;
657 case DCCP_SOCKOPT_QPOLICY_ID:
658 val = dp->dccps_qpolicy;
659 break;
660 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
661 val = dp->dccps_tx_qlen;
662 break;
642 case 128 ... 191: 663 case 128 ... 191:
643 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 664 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
644 len, (u32 __user *)optval, optlen); 665 len, (u32 __user *)optval, optlen);
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
681EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
682#endif 703#endif
683 704
705static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
708
709 /*
710 * Assign an (opaque) qpolicy priority value to skb->priority.
711 *
712 * We are overloading this skb field for use with the qpolicy subystem.
713 * The skb->priority is normally used for the SO_PRIORITY option, which
714 * is initialised from sk_priority. Since the assignment of sk_priority
715 * to skb->priority happens later (on layer 3), we overload this field
716 * for use with queueing priorities as long as the skb is on layer 4.
717 * The default priority value (if nothing is set) is 0.
718 */
719 skb->priority = 0;
720
721 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
722
723 if (!CMSG_OK(msg, cmsg))
724 return -EINVAL;
725
726 if (cmsg->cmsg_level != SOL_DCCP)
727 continue;
728
729 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
730 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
731 return -EINVAL;
732
733 switch (cmsg->cmsg_type) {
734 case DCCP_SCM_PRIORITY:
735 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
736 return -EINVAL;
737 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
738 break;
739 default:
740 return -EINVAL;
741 }
742 }
743 return 0;
744}
745
684int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 746int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
685 size_t len) 747 size_t len)
686{ 748{
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 758
697 lock_sock(sk); 759 lock_sock(sk);
698 760
699 if (sysctl_dccp_tx_qlen && 761 if (dccp_qpolicy_full(sk)) {
700 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
701 rc = -EAGAIN; 762 rc = -EAGAIN;
702 goto out_release; 763 goto out_release;
703 } 764 }
@@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
725 if (rc != 0) 786 if (rc != 0)
726 goto out_discard; 787 goto out_discard;
727 788
728 skb_queue_tail(&sk->sk_write_queue, skb); 789 rc = dccp_msghdr_parse(msg, skb);
790 if (rc != 0)
791 goto out_discard;
792
793 dccp_qpolicy_push(sk, skb);
729 /* 794 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire 795 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the 796 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..63c30bfa4703
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,137 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76 __be32 params;
77
78} qpol_table[DCCPQ_POLICY_MAX] = {
79 [DCCPQ_POLICY_SIMPLE] = {
80 .push = qpolicy_simple_push,
81 .full = qpolicy_simple_full,
82 .top = qpolicy_simple_top,
83 .params = 0,
84 },
85 [DCCPQ_POLICY_PRIO] = {
86 .push = qpolicy_simple_push,
87 .full = qpolicy_prio_full,
88 .top = qpolicy_prio_best_skb,
89 .params = DCCP_SCM_PRIORITY,
90 },
91};
92
93/*
94 * Externally visible interface
95 */
96void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
97{
98 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
99}
100
101bool dccp_qpolicy_full(struct sock *sk)
102{
103 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
104}
105
106void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
107{
108 if (skb != NULL) {
109 skb_unlink(skb, &sk->sk_write_queue);
110 kfree_skb(skb);
111 }
112}
113
114struct sk_buff *dccp_qpolicy_top(struct sock *sk)
115{
116 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
117}
118
119struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
120{
121 struct sk_buff *skb = dccp_qpolicy_top(sk);
122
123 if (skb != NULL) {
124 /* Clear any skb fields that we used internally */
125 skb->priority = 0;
126 skb_unlink(skb, &sk->sk_write_queue);
127 }
128 return skb;
129}
130
131bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
132{
133 /* check if exactly one bit is set */
134 if (!param || (param & (param - 1)))
135 return false;
136 return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
137}