aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 17:25:22 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 17:25:22 -0400
commit1d8674edb534a3c5cb549bfde5a39fa5598cb3bc (patch)
tree2fe4a39c0d4a26400d8b89e800ba596f1de88ec7
parentd99901d6fdfb4098b9996de89ffbbae890e08288 (diff)
parentcb7b593c2c808b32a1ea188599713c434b95f849 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
-rw-r--r--include/linux/dccp.h3
-rw-r--r--net/dccp/ccids/ccid3.c163
-rw-r--r--net/dccp/ccids/ccid3.h14
-rw-r--r--net/dccp/ccids/lib/packet_history.h3
-rw-r--r--net/dccp/dccp.h16
-rw-r--r--net/dccp/input.c4
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/options.c90
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/fib_trie.c804
11 files changed, 557 insertions, 555 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 007c290f74d..8bf4bacb505 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -432,7 +432,10 @@ struct dccp_sock {
432 struct ccid *dccps_hc_rx_ccid; 432 struct ccid *dccps_hc_rx_ccid;
433 struct ccid *dccps_hc_tx_ccid; 433 struct ccid *dccps_hc_tx_ccid;
434 struct dccp_options_received dccps_options_received; 434 struct dccp_options_received dccps_options_received;
435 struct timeval dccps_epoch;
435 enum dccp_role dccps_role:2; 436 enum dccp_role dccps_role:2;
437 __u8 dccps_hc_rx_insert_options:1;
438 __u8 dccps_hc_tx_insert_options:1;
436}; 439};
437 440
438static inline struct dccp_sock *dccp_sk(const struct sock *sk) 441static inline struct dccp_sock *dccp_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 7bf3b3a91e9..ea30012dd19 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -43,12 +43,22 @@
43#include "ccid3.h" 43#include "ccid3.h"
44 44
45/* 45/*
46 * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. 46 * Reason for maths here is to avoid 32 bit overflow when a is big.
47 * With this we get close to the limit.
47 */ 48 */
48static inline u32 usecs_div(const u32 a, const u32 b) 49static inline u32 usecs_div(const u32 a, const u32 b)
49{ 50{
50 const u32 tmp = a * (USEC_PER_SEC / 10); 51 const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 :
51 return b > 20 ? tmp / (b / 10) : tmp; 52 a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 :
53 a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 :
54 a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 :
55 a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 :
56 a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 :
57 a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
58 a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
59 100000;
60 const u32 tmp = a * (USEC_PER_SEC / div);
61 return (b >= 2 * div) ? tmp / (b / div) : tmp;
52} 62}
53 63
54static int ccid3_debug; 64static int ccid3_debug;
@@ -102,8 +112,7 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
102static inline void ccid3_hc_tx_set_state(struct sock *sk, 112static inline void ccid3_hc_tx_set_state(struct sock *sk,
103 enum ccid3_hc_tx_states state) 113 enum ccid3_hc_tx_states state)
104{ 114{
105 struct dccp_sock *dp = dccp_sk(sk); 115 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
106 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
107 enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; 116 enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
108 117
109 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", 118 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
@@ -144,8 +153,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
144 */ 153 */
145static void ccid3_hc_tx_update_x(struct sock *sk) 154static void ccid3_hc_tx_update_x(struct sock *sk)
146{ 155{
147 struct dccp_sock *dp = dccp_sk(sk); 156 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
148 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
149 157
150 /* To avoid large error in calcX */ 158 /* To avoid large error in calcX */
151 if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { 159 if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
@@ -159,7 +167,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk)
159 } else { 167 } else {
160 struct timeval now; 168 struct timeval now;
161 169
162 do_gettimeofday(&now); 170 dccp_timestamp(sk, &now);
163 if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= 171 if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
164 hctx->ccid3hctx_rtt) { 172 hctx->ccid3hctx_rtt) {
165 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, 173 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
@@ -174,9 +182,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk)
174static void ccid3_hc_tx_no_feedback_timer(unsigned long data) 182static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
175{ 183{
176 struct sock *sk = (struct sock *)data; 184 struct sock *sk = (struct sock *)data;
177 struct dccp_sock *dp = dccp_sk(sk);
178 unsigned long next_tmout = 0; 185 unsigned long next_tmout = 0;
179 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 186 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
180 187
181 bh_lock_sock(sk); 188 bh_lock_sock(sk);
182 if (sock_owned_by_user(sk)) { 189 if (sock_owned_by_user(sk)) {
@@ -274,7 +281,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
274 struct sk_buff *skb, int len) 281 struct sk_buff *skb, int len)
275{ 282{
276 struct dccp_sock *dp = dccp_sk(sk); 283 struct dccp_sock *dp = dccp_sk(sk);
277 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 284 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
278 struct dccp_tx_hist_entry *new_packet; 285 struct dccp_tx_hist_entry *new_packet;
279 struct timeval now; 286 struct timeval now;
280 long delay; 287 long delay;
@@ -307,7 +314,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
307 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); 314 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
308 } 315 }
309 316
310 do_gettimeofday(&now); 317 dccp_timestamp(sk, &now);
311 318
312 switch (hctx->ccid3hctx_state) { 319 switch (hctx->ccid3hctx_state) {
313 case TFRC_SSTATE_NO_SENT: 320 case TFRC_SSTATE_NO_SENT:
@@ -348,18 +355,20 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
348 } 355 }
349 356
350 /* Can we send? if so add options and add to packet history */ 357 /* Can we send? if so add options and add to packet history */
351 if (rc == 0) 358 if (rc == 0) {
359 dp->dccps_hc_tx_insert_options = 1;
352 new_packet->dccphtx_ccval = 360 new_packet->dccphtx_ccval =
353 DCCP_SKB_CB(skb)->dccpd_ccval = 361 DCCP_SKB_CB(skb)->dccpd_ccval =
354 hctx->ccid3hctx_last_win_count; 362 hctx->ccid3hctx_last_win_count;
363 }
355out: 364out:
356 return rc; 365 return rc;
357} 366}
358 367
359static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) 368static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
360{ 369{
361 struct dccp_sock *dp = dccp_sk(sk); 370 const struct dccp_sock *dp = dccp_sk(sk);
362 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 371 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
363 struct timeval now; 372 struct timeval now;
364 373
365 BUG_ON(hctx == NULL); 374 BUG_ON(hctx == NULL);
@@ -370,7 +379,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
370 return; 379 return;
371 } 380 }
372 381
373 do_gettimeofday(&now); 382 dccp_timestamp(sk, &now);
374 383
375 /* check if we have sent a data packet */ 384 /* check if we have sent a data packet */
376 if (len > 0) { 385 if (len > 0) {
@@ -445,10 +454,11 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
445 454
446static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 455static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
447{ 456{
448 struct dccp_sock *dp = dccp_sk(sk); 457 const struct dccp_sock *dp = dccp_sk(sk);
449 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 458 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
450 struct ccid3_options_received *opt_recv; 459 struct ccid3_options_received *opt_recv;
451 struct dccp_tx_hist_entry *packet; 460 struct dccp_tx_hist_entry *packet;
461 struct timeval now;
452 unsigned long next_tmout; 462 unsigned long next_tmout;
453 u32 t_elapsed; 463 u32 t_elapsed;
454 u32 pinv; 464 u32 pinv;
@@ -471,7 +481,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
471 481
472 opt_recv = &hctx->ccid3hctx_options_received; 482 opt_recv = &hctx->ccid3hctx_options_received;
473 483
474 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; 484 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
475 x_recv = opt_recv->ccid3or_receive_rate; 485 x_recv = opt_recv->ccid3or_receive_rate;
476 pinv = opt_recv->ccid3or_loss_event_rate; 486 pinv = opt_recv->ccid3or_loss_event_rate;
477 487
@@ -496,9 +506,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
496 } 506 }
497 507
498 /* Update RTT */ 508 /* Update RTT */
499 r_sample = timeval_now_delta(&packet->dccphtx_tstamp); 509 dccp_timestamp(sk, &now);
500 /* FIXME: */ 510 r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
501 // r_sample -= usecs_to_jiffies(t_elapsed * 10); 511 if (unlikely(r_sample <= t_elapsed))
512 LIMIT_NETDEBUG(KERN_WARNING
513 "%s: r_sample=%uus, t_elapsed=%uus\n",
514 __FUNCTION__, r_sample, t_elapsed);
515 else
516 r_sample -= t_elapsed;
502 517
503 /* Update RTT estimate by 518 /* Update RTT estimate by
504 * If (No feedback recv) 519 * If (No feedback recv)
@@ -591,8 +606,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
591 606
592static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) 607static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
593{ 608{
594 const struct dccp_sock *dp = dccp_sk(sk); 609 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
595 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
596 610
597 if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || 611 if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
598 sk->sk_state == DCCP_PARTOPEN)) 612 sk->sk_state == DCCP_PARTOPEN))
@@ -606,8 +620,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
606 unsigned char *value) 620 unsigned char *value)
607{ 621{
608 int rc = 0; 622 int rc = 0;
609 struct dccp_sock *dp = dccp_sk(sk); 623 const struct dccp_sock *dp = dccp_sk(sk);
610 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 624 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
611 struct ccid3_options_received *opt_recv; 625 struct ccid3_options_received *opt_recv;
612 626
613 if (hctx == NULL) 627 if (hctx == NULL)
@@ -670,11 +684,11 @@ static int ccid3_hc_tx_init(struct sock *sk)
670 684
671 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 685 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
672 686
673 hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), 687 dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
674 gfp_any()); 688 if (dp->dccps_hc_tx_ccid_private == NULL)
675 if (hctx == NULL)
676 return -ENOMEM; 689 return -ENOMEM;
677 690
691 hctx = ccid3_hc_tx_sk(sk);
678 memset(hctx, 0, sizeof(*hctx)); 692 memset(hctx, 0, sizeof(*hctx));
679 693
680 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && 694 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
@@ -696,7 +710,7 @@ static int ccid3_hc_tx_init(struct sock *sk)
696static void ccid3_hc_tx_exit(struct sock *sk) 710static void ccid3_hc_tx_exit(struct sock *sk)
697{ 711{
698 struct dccp_sock *dp = dccp_sk(sk); 712 struct dccp_sock *dp = dccp_sk(sk);
699 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 713 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
700 714
701 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 715 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
702 BUG_ON(hctx == NULL); 716 BUG_ON(hctx == NULL);
@@ -738,8 +752,7 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
738static inline void ccid3_hc_rx_set_state(struct sock *sk, 752static inline void ccid3_hc_rx_set_state(struct sock *sk,
739 enum ccid3_hc_rx_states state) 753 enum ccid3_hc_rx_states state)
740{ 754{
741 struct dccp_sock *dp = dccp_sk(sk); 755 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
742 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
743 enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; 756 enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
744 757
745 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", 758 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
@@ -751,14 +764,14 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk,
751 764
752static void ccid3_hc_rx_send_feedback(struct sock *sk) 765static void ccid3_hc_rx_send_feedback(struct sock *sk)
753{ 766{
767 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
754 struct dccp_sock *dp = dccp_sk(sk); 768 struct dccp_sock *dp = dccp_sk(sk);
755 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
756 struct dccp_rx_hist_entry *packet; 769 struct dccp_rx_hist_entry *packet;
757 struct timeval now; 770 struct timeval now;
758 771
759 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 772 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
760 773
761 do_gettimeofday(&now); 774 dccp_timestamp(sk, &now);
762 775
763 switch (hcrx->ccid3hcrx_state) { 776 switch (hcrx->ccid3hcrx_state) {
764 case TFRC_RSTATE_NO_DATA: 777 case TFRC_RSTATE_NO_DATA:
@@ -767,11 +780,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
767 case TFRC_RSTATE_DATA: { 780 case TFRC_RSTATE_DATA: {
768 const u32 delta = timeval_delta(&now, 781 const u32 delta = timeval_delta(&now,
769 &hcrx->ccid3hcrx_tstamp_last_feedback); 782 &hcrx->ccid3hcrx_tstamp_last_feedback);
770 783 hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
771 hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * 784 delta);
772 USEC_PER_SEC);
773 if (likely(delta > 1))
774 hcrx->ccid3hcrx_x_recv /= delta;
775 } 785 }
776 break; 786 break;
777 default: 787 default:
@@ -801,14 +811,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
801 hcrx->ccid3hcrx_pinv = ~0; 811 hcrx->ccid3hcrx_pinv = ~0;
802 else 812 else
803 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; 813 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
814 dp->dccps_hc_rx_insert_options = 1;
804 dccp_send_ack(sk); 815 dccp_send_ack(sk);
805} 816}
806 817
807static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 818static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
808{ 819{
809 const struct dccp_sock *dp = dccp_sk(sk); 820 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
810 u32 x_recv, pinv; 821 u32 x_recv, pinv;
811 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
812 822
813 if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || 823 if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
814 sk->sk_state == DCCP_PARTOPEN)) 824 sk->sk_state == DCCP_PARTOPEN))
@@ -837,8 +847,7 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
837 847
838static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) 848static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
839{ 849{
840 struct dccp_sock *dp = dccp_sk(sk); 850 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
841 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
842 struct dccp_rx_hist_entry *entry, *next, *tail = NULL; 851 struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
843 u32 rtt, delta, x_recv, fval, p, tmp2; 852 u32 rtt, delta, x_recv, fval, p, tmp2;
844 struct timeval tstamp = { 0, }; 853 struct timeval tstamp = { 0, };
@@ -889,10 +898,9 @@ found:
889 if (rtt == 0) 898 if (rtt == 0)
890 rtt = 1; 899 rtt = 1;
891 900
892 delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); 901 dccp_timestamp(sk, &tstamp);
893 x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; 902 delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
894 if (likely(delta > 1)) 903 x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
895 x_recv /= delta;
896 904
897 tmp1 = (u64)x_recv * (u64)rtt; 905 tmp1 = (u64)x_recv * (u64)rtt;
898 do_div(tmp1,10000000); 906 do_div(tmp1,10000000);
@@ -911,8 +919,7 @@ found:
911 919
912static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) 920static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
913{ 921{
914 struct dccp_sock *dp = dccp_sk(sk); 922 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
915 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
916 923
917 if (seq_loss != DCCP_MAX_SEQNO + 1 && 924 if (seq_loss != DCCP_MAX_SEQNO + 1 &&
918 list_empty(&hcrx->ccid3hcrx_li_hist)) { 925 list_empty(&hcrx->ccid3hcrx_li_hist)) {
@@ -930,8 +937,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
930 937
931static void ccid3_hc_rx_detect_loss(struct sock *sk) 938static void ccid3_hc_rx_detect_loss(struct sock *sk)
932{ 939{
933 struct dccp_sock *dp = dccp_sk(sk); 940 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
934 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
935 u8 win_loss; 941 u8 win_loss;
936 const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, 942 const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
937 &hcrx->ccid3hcrx_li_hist, 943 &hcrx->ccid3hcrx_li_hist,
@@ -942,13 +948,12 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk)
942 948
943static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) 949static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
944{ 950{
945 struct dccp_sock *dp = dccp_sk(sk); 951 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
946 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
947 const struct dccp_options_received *opt_recv; 952 const struct dccp_options_received *opt_recv;
948 struct dccp_rx_hist_entry *packet; 953 struct dccp_rx_hist_entry *packet;
949 struct timeval now; 954 struct timeval now;
950 u8 win_count; 955 u8 win_count;
951 u32 p_prev; 956 u32 p_prev, r_sample, t_elapsed;
952 int ins; 957 int ins;
953 958
954 if (hcrx == NULL) 959 if (hcrx == NULL)
@@ -957,7 +962,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
957 BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || 962 BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
958 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); 963 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
959 964
960 opt_recv = &dp->dccps_options_received; 965 opt_recv = &dccp_sk(sk)->dccps_options_received;
961 966
962 switch (DCCP_SKB_CB(skb)->dccpd_type) { 967 switch (DCCP_SKB_CB(skb)->dccpd_type) {
963 case DCCP_PKT_ACK: 968 case DCCP_PKT_ACK:
@@ -967,10 +972,24 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
967 if (opt_recv->dccpor_timestamp_echo == 0) 972 if (opt_recv->dccpor_timestamp_echo == 0)
968 break; 973 break;
969 p_prev = hcrx->ccid3hcrx_rtt; 974 p_prev = hcrx->ccid3hcrx_rtt;
970 do_gettimeofday(&now); 975 dccp_timestamp(sk, &now);
971 hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - 976 timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
972 (opt_recv->dccpor_timestamp_echo - 977 r_sample = timeval_usecs(&now);
973 opt_recv->dccpor_elapsed_time) * 10; 978 t_elapsed = opt_recv->dccpor_elapsed_time * 10;
979
980 if (unlikely(r_sample <= t_elapsed))
981 LIMIT_NETDEBUG(KERN_WARNING
982 "%s: r_sample=%uus, t_elapsed=%uus\n",
983 __FUNCTION__, r_sample, t_elapsed);
984 else
985 r_sample -= t_elapsed;
986
987 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
988 hcrx->ccid3hcrx_rtt = r_sample;
989 else
990 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
991 r_sample / 10;
992
974 if (p_prev != hcrx->ccid3hcrx_rtt) 993 if (p_prev != hcrx->ccid3hcrx_rtt)
975 ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", 994 ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
976 dccp_role(sk), hcrx->ccid3hcrx_rtt, 995 dccp_role(sk), hcrx->ccid3hcrx_rtt,
@@ -985,7 +1004,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
985 return; 1004 return;
986 } 1005 }
987 1006
988 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, 1007 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
989 skb, SLAB_ATOMIC); 1008 skb, SLAB_ATOMIC);
990 if (packet == NULL) { 1009 if (packet == NULL) {
991 ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " 1010 ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
@@ -1017,7 +1036,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
1017 if (ins != 0) 1036 if (ins != 0)
1018 break; 1037 break;
1019 1038
1020 do_gettimeofday(&now); 1039 dccp_timestamp(sk, &now);
1021 if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= 1040 if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
1022 hcrx->ccid3hcrx_rtt) { 1041 hcrx->ccid3hcrx_rtt) {
1023 hcrx->ccid3hcrx_tstamp_last_ack = now; 1042 hcrx->ccid3hcrx_tstamp_last_ack = now;
@@ -1056,11 +1075,11 @@ static int ccid3_hc_rx_init(struct sock *sk)
1056 1075
1057 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 1076 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1058 1077
1059 hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), 1078 dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
1060 gfp_any()); 1079 if (dp->dccps_hc_rx_ccid_private == NULL)
1061 if (hcrx == NULL)
1062 return -ENOMEM; 1080 return -ENOMEM;
1063 1081
1082 hcrx = ccid3_hc_rx_sk(sk);
1064 memset(hcrx, 0, sizeof(*hcrx)); 1083 memset(hcrx, 0, sizeof(*hcrx));
1065 1084
1066 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && 1085 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
@@ -1072,18 +1091,16 @@ static int ccid3_hc_rx_init(struct sock *sk)
1072 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 1091 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
1073 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); 1092 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1074 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); 1093 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1075 /* 1094 dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1076 * XXX this seems to be paranoid, need to think more about this, for 1095 hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1077 * now start with something different than zero. -acme 1096 hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */
1078 */
1079 hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
1080 return 0; 1097 return 0;
1081} 1098}
1082 1099
1083static void ccid3_hc_rx_exit(struct sock *sk) 1100static void ccid3_hc_rx_exit(struct sock *sk)
1084{ 1101{
1102 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1085 struct dccp_sock *dp = dccp_sk(sk); 1103 struct dccp_sock *dp = dccp_sk(sk);
1086 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1087 1104
1088 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 1105 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1089 1106
@@ -1104,8 +1121,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
1104 1121
1105static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 1122static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
1106{ 1123{
1107 const struct dccp_sock *dp = dccp_sk(sk); 1124 const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1108 const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1109 1125
1110 if (hcrx == NULL) 1126 if (hcrx == NULL)
1111 return; 1127 return;
@@ -1117,8 +1133,7 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
1117 1133
1118static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 1134static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1119{ 1135{
1120 const struct dccp_sock *dp = dccp_sk(sk); 1136 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1121 const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1122 1137
1123 if (hctx == NULL) 1138 if (hctx == NULL)
1124 return; 1139 return;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index ee8cbace663..d16f00d784f 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -115,7 +115,7 @@ struct ccid3_hc_rx_sock {
115 u64 ccid3hcrx_seqno_last_counter:48, 115 u64 ccid3hcrx_seqno_last_counter:48,
116 ccid3hcrx_state:8, 116 ccid3hcrx_state:8,
117 ccid3hcrx_last_counter:4; 117 ccid3hcrx_last_counter:4;
118 unsigned long ccid3hcrx_rtt; 118 u32 ccid3hcrx_rtt;
119 u32 ccid3hcrx_p; 119 u32 ccid3hcrx_p;
120 u32 ccid3hcrx_bytes_recv; 120 u32 ccid3hcrx_bytes_recv;
121 struct timeval ccid3hcrx_tstamp_last_feedback; 121 struct timeval ccid3hcrx_tstamp_last_feedback;
@@ -128,10 +128,14 @@ struct ccid3_hc_rx_sock {
128 u32 ccid3hcrx_x_recv; 128 u32 ccid3hcrx_x_recv;
129}; 129};
130 130
131#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ 131static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
132 ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) 132{
133 return dccp_sk(sk)->dccps_hc_tx_ccid_private;
134}
133 135
134#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ 136static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
135 ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) 137{
138 return dccp_sk(sk)->dccps_hc_rx_ccid_private;
139}
136 140
137#endif /* _DCCP_CCID3_H_ */ 141#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index fb90a91aa93..b375ebdb7dc 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry *
134 134
135static inline struct dccp_rx_hist_entry * 135static inline struct dccp_rx_hist_entry *
136 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, 136 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
137 const struct sock *sk,
137 const u32 ndp, 138 const u32 ndp,
138 const struct sk_buff *skb, 139 const struct sk_buff *skb,
139 const unsigned int __nocast prio) 140 const unsigned int __nocast prio)
@@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry *
148 entry->dccphrx_ccval = dh->dccph_ccval; 149 entry->dccphrx_ccval = dh->dccph_ccval;
149 entry->dccphrx_type = dh->dccph_type; 150 entry->dccphrx_type = dh->dccph_type;
150 entry->dccphrx_ndp = ndp; 151 entry->dccphrx_ndp = ndp;
151 do_gettimeofday(&(entry->dccphrx_tstamp)); 152 dccp_timestamp(sk, &entry->dccphrx_tstamp);
152 } 153 }
153 154
154 return entry; 155 return entry;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 33456c0d593..95c4630b3b1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -426,10 +426,13 @@ extern struct dccp_ackpkts *
426 dccp_ackpkts_alloc(unsigned int len, 426 dccp_ackpkts_alloc(unsigned int len,
427 const unsigned int __nocast priority); 427 const unsigned int __nocast priority);
428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); 428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); 429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
430 u64 ackno, u8 state);
430extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, 431extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
431 struct sock *sk, u64 ackno); 432 struct sock *sk, u64 ackno);
432 433
434extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
435
433static inline suseconds_t timeval_usecs(const struct timeval *tv) 436static inline suseconds_t timeval_usecs(const struct timeval *tv)
434{ 437{
435 return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; 438 return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
@@ -468,17 +471,6 @@ static inline void timeval_sub_usecs(struct timeval *tv,
468 } 471 }
469} 472}
470 473
471/*
472 * Returns the difference in usecs between timeval
473 * passed in and current time
474 */
475static inline suseconds_t timeval_now_delta(const struct timeval *tv)
476{
477 struct timeval now;
478 do_gettimeofday(&now);
479 return timeval_delta(&now, tv);
480}
481
482#ifdef CONFIG_IP_DCCP_DEBUG 474#ifdef CONFIG_IP_DCCP_DEBUG
483extern void dccp_ackvector_print(const u64 ackno, 475extern void dccp_ackvector_print(const u64 ackno,
484 const unsigned char *vector, int len); 476 const unsigned char *vector, int len);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index ef29cef1daf..c60bc3433f5 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -170,7 +170,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
170 if (dp->dccps_options.dccpo_send_ack_vector) { 170 if (dp->dccps_options.dccpo_send_ack_vector) {
171 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; 171 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
172 172
173 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, 173 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
174 DCCP_SKB_CB(skb)->dccpd_seq, 174 DCCP_SKB_CB(skb)->dccpd_seq,
175 DCCP_ACKPKTS_STATE_RECEIVED)) { 175 DCCP_ACKPKTS_STATE_RECEIVED)) {
176 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " 176 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
@@ -498,7 +498,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
498 * DCCP_ACKPKTS_STATE_ECN_MARKED 498 * DCCP_ACKPKTS_STATE_ECN_MARKED
499 */ 499 */
500 if (dp->dccps_options.dccpo_send_ack_vector) { 500 if (dp->dccps_options.dccpo_send_ack_vector) {
501 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, 501 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
502 DCCP_SKB_CB(skb)->dccpd_seq, 502 DCCP_SKB_CB(skb)->dccpd_seq,
503 DCCP_ACKPKTS_STATE_RECEIVED)) 503 DCCP_ACKPKTS_STATE_RECEIVED))
504 goto discard; 504 goto discard;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3fc75dbee4b..fee9a8c3777 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1243,6 +1243,7 @@ static int dccp_v4_init_sock(struct sock *sk)
1243 static int dccp_ctl_socket_init = 1; 1243 static int dccp_ctl_socket_init = 1;
1244 1244
1245 dccp_options_init(&dp->dccps_options); 1245 dccp_options_init(&dp->dccps_options);
1246 do_gettimeofday(&dp->dccps_epoch);
1246 1247
1247 if (dp->dccps_options.dccpo_send_ack_vector) { 1248 if (dp->dccps_options.dccpo_send_ack_vector) {
1248 dp->dccps_hc_rx_ackpkts = 1249 dp->dccps_hc_rx_ackpkts =
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ce5dff4ac22..18461bc04cb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -96,6 +96,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
96 newdp->dccps_hc_rx_ackpkts = NULL; 96 newdp->dccps_hc_rx_ackpkts = NULL;
97 newdp->dccps_role = DCCP_ROLE_SERVER; 97 newdp->dccps_role = DCCP_ROLE_SERVER;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99 do_gettimeofday(&newdp->dccps_epoch);
99 100
100 if (newdp->dccps_options.dccpo_send_ack_vector) { 101 if (newdp->dccps_options.dccpo_send_ack_vector) {
101 newdp->dccps_hc_rx_ackpkts = 102 newdp->dccps_hc_rx_ackpkts =
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 382c5894acb..d4c4242d8dd 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -72,6 +72,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
72 struct dccp_options_received *opt_recv = &dp->dccps_options_received; 72 struct dccp_options_received *opt_recv = &dp->dccps_options_received;
73 unsigned char opt, len; 73 unsigned char opt, len;
74 unsigned char *value; 74 unsigned char *value;
75 u32 elapsed_time;
75 76
76 memset(opt_recv, 0, sizeof(*opt_recv)); 77 memset(opt_recv, 0, sizeof(*opt_recv));
77 78
@@ -139,7 +140,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
139 opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); 140 opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
140 141
141 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; 142 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
142 do_gettimeofday(&dp->dccps_timestamp_time); 143 dccp_timestamp(sk, &dp->dccps_timestamp_time);
143 144
144 dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", 145 dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
145 debug_prefix, opt_recv->dccpor_timestamp, 146 debug_prefix, opt_recv->dccpor_timestamp,
@@ -159,18 +160,18 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
159 (unsigned long long) 160 (unsigned long long)
160 DCCP_SKB_CB(skb)->dccpd_ack_seq); 161 DCCP_SKB_CB(skb)->dccpd_ack_seq);
161 162
162 if (len > 4) {
163 if (len == 6)
164 opt_recv->dccpor_elapsed_time =
165 ntohs(*(u16 *)(value + 4));
166 else
167 opt_recv->dccpor_elapsed_time =
168 ntohl(*(u32 *)(value + 4));
169 163
170 dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", 164 if (len == 4)
171 debug_prefix, 165 break;
172 opt_recv->dccpor_elapsed_time); 166
173 } 167 if (len == 6)
168 elapsed_time = ntohs(*(u16 *)(value + 4));
169 else
170 elapsed_time = ntohl(*(u32 *)(value + 4));
171
172 /* Give precedence to the biggest ELAPSED_TIME */
173 if (elapsed_time > opt_recv->dccpor_elapsed_time)
174 opt_recv->dccpor_elapsed_time = elapsed_time;
174 break; 175 break;
175 case DCCPO_ELAPSED_TIME: 176 case DCCPO_ELAPSED_TIME:
176 if (len != 2 && len != 4) 177 if (len != 2 && len != 4)
@@ -180,14 +181,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
180 continue; 181 continue;
181 182
182 if (len == 2) 183 if (len == 2)
183 opt_recv->dccpor_elapsed_time = 184 elapsed_time = ntohs(*(u16 *)value);
184 ntohs(*(u16 *)value);
185 else 185 else
186 opt_recv->dccpor_elapsed_time = 186 elapsed_time = ntohl(*(u32 *)value);
187 ntohl(*(u32 *)value); 187
188 if (elapsed_time > opt_recv->dccpor_elapsed_time)
189 opt_recv->dccpor_elapsed_time = elapsed_time;
188 190
189 dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, 191 dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
190 opt_recv->dccpor_elapsed_time); 192 elapsed_time);
191 break; 193 break;
192 /* 194 /*
193 * From draft-ietf-dccp-spec-11.txt: 195 * From draft-ietf-dccp-spec-11.txt:
@@ -359,9 +361,13 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
359#endif 361#endif
360 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; 362 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
361 int len = ap->dccpap_buf_vector_len + 2; 363 int len = ap->dccpap_buf_vector_len + 2;
362 const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; 364 struct timeval now;
365 u32 elapsed_time;
363 unsigned char *to, *from; 366 unsigned char *to, *from;
364 367
368 dccp_timestamp(sk, &now);
369 elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
370
365 if (elapsed_time != 0) 371 if (elapsed_time != 0)
366 dccp_insert_option_elapsed_time(sk, skb, elapsed_time); 372 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
367 373
@@ -426,13 +432,29 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
426 (unsigned long long) ap->dccpap_ack_ackno); 432 (unsigned long long) ap->dccpap_ack_ackno);
427} 433}
428 434
435void dccp_timestamp(const struct sock *sk, struct timeval *tv)
436{
437 const struct dccp_sock *dp = dccp_sk(sk);
438
439 do_gettimeofday(tv);
440 tv->tv_sec -= dp->dccps_epoch.tv_sec;
441 tv->tv_usec -= dp->dccps_epoch.tv_usec;
442
443 while (tv->tv_usec < 0) {
444 tv->tv_sec--;
445 tv->tv_usec += USEC_PER_SEC;
446 }
447}
448
449EXPORT_SYMBOL_GPL(dccp_timestamp);
450
429void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) 451void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
430{ 452{
431 struct timeval tv; 453 struct timeval tv;
432 u32 now; 454 u32 now;
433 455
434 do_gettimeofday(&tv); 456 dccp_timestamp(sk, &tv);
435 now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; 457 now = timeval_usecs(&tv) / 10;
436 /* yes this will overflow but that is the point as we want a 458 /* yes this will overflow but that is the point as we want a
437 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ 459 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
438 460
@@ -450,13 +472,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk,
450 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? 472 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
451 "CLIENT TX opt: " : "server TX opt: "; 473 "CLIENT TX opt: " : "server TX opt: ";
452#endif 474#endif
475 struct timeval now;
453 u32 tstamp_echo; 476 u32 tstamp_echo;
454 const u32 elapsed_time = 477 u32 elapsed_time;
455 timeval_now_delta(&dp->dccps_timestamp_time) / 10; 478 int len, elapsed_time_len;
456 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
457 const int len = 6 + elapsed_time_len;
458 unsigned char *to; 479 unsigned char *to;
459 480
481 dccp_timestamp(sk, &now);
482 elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
483 elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
484 len = 6 + elapsed_time_len;
485
460 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { 486 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
461 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " 487 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
462 "timestamp echo!\n"); 488 "timestamp echo!\n");
@@ -505,13 +531,18 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
505 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != 531 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
506 DCCP_MAX_SEQNO + 1)) 532 DCCP_MAX_SEQNO + 1))
507 dccp_insert_option_ack_vector(sk, skb); 533 dccp_insert_option_ack_vector(sk, skb);
508
509 if (dp->dccps_timestamp_echo != 0) 534 if (dp->dccps_timestamp_echo != 0)
510 dccp_insert_option_timestamp_echo(sk, skb); 535 dccp_insert_option_timestamp_echo(sk, skb);
511 } 536 }
512 537
513 ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); 538 if (dp->dccps_hc_rx_insert_options) {
514 ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); 539 ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
540 dp->dccps_hc_rx_insert_options = 0;
541 }
542 if (dp->dccps_hc_tx_insert_options) {
543 ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
544 dp->dccps_hc_tx_insert_options = 0;
545 }
515 546
516 /* XXX: insert other options when appropriate */ 547 /* XXX: insert other options when appropriate */
517 548
@@ -616,7 +647,8 @@ static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
616/* 647/*
617 * Implements the draft-ietf-dccp-spec-11.txt Appendix A 648 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
618 */ 649 */
619int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) 650int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
651 u64 ackno, u8 state)
620{ 652{
621 /* 653 /*
622 * Check at the right places if the buffer is full, if it is, tell the 654 * Check at the right places if the buffer is full, if it is, tell the
@@ -697,7 +729,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
697 } 729 }
698 730
699 ap->dccpap_buf_ackno = ackno; 731 ap->dccpap_buf_ackno = ackno;
700 do_gettimeofday(&ap->dccpap_time); 732 dccp_timestamp(sk, &ap->dccpap_time);
701out: 733out:
702 dccp_pr_debug(""); 734 dccp_pr_debug("");
703 dccp_ackpkts_print(ap); 735 dccp_ackpkts_print(ap);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bf147f8db39..a9d84f93442 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1248,11 +1248,6 @@ module_init(inet_init);
1248/* ------------------------------------------------------------------------ */ 1248/* ------------------------------------------------------------------------ */
1249 1249
1250#ifdef CONFIG_PROC_FS 1250#ifdef CONFIG_PROC_FS
1251#ifdef CONFIG_IP_FIB_TRIE
1252extern int fib_stat_proc_init(void);
1253extern void fib_stat_proc_exit(void);
1254#endif
1255
1256static int __init ipv4_proc_init(void) 1251static int __init ipv4_proc_init(void)
1257{ 1252{
1258 int rc = 0; 1253 int rc = 0;
@@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void)
1265 goto out_udp; 1260 goto out_udp;
1266 if (fib_proc_init()) 1261 if (fib_proc_init())
1267 goto out_fib; 1262 goto out_fib;
1268#ifdef CONFIG_IP_FIB_TRIE
1269 if (fib_stat_proc_init())
1270 goto out_fib_stat;
1271#endif
1272 if (ip_misc_proc_init()) 1263 if (ip_misc_proc_init())
1273 goto out_misc; 1264 goto out_misc;
1274out: 1265out:
1275 return rc; 1266 return rc;
1276out_misc: 1267out_misc:
1277#ifdef CONFIG_IP_FIB_TRIE
1278 fib_stat_proc_exit();
1279out_fib_stat:
1280#endif
1281 fib_proc_exit(); 1268 fib_proc_exit();
1282out_fib: 1269out_fib:
1283 udp4_proc_exit(); 1270 udp4_proc_exit();
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b2dea4e5da7..1b63b482416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.402" 46#define VERSION "0.403"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn);
164static struct tnode *inflate(struct trie *t, struct tnode *tn); 164static struct tnode *inflate(struct trie *t, struct tnode *tn);
165static struct tnode *halve(struct trie *t, struct tnode *tn); 165static struct tnode *halve(struct trie *t, struct tnode *tn);
166static void tnode_free(struct tnode *tn); 166static void tnode_free(struct tnode *tn);
167static void trie_dump_seq(struct seq_file *seq, struct trie *t);
168 167
169static kmem_cache_t *fn_alias_kmem __read_mostly; 168static kmem_cache_t *fn_alias_kmem __read_mostly;
170static struct trie *trie_local = NULL, *trie_main = NULL; 169static struct trie *trie_local = NULL, *trie_main = NULL;
@@ -1971,558 +1970,525 @@ struct fib_table * __init fib_hash_init(int id)
1971 return tb; 1970 return tb;
1972} 1971}
1973 1972
1974/* Trie dump functions */ 1973#ifdef CONFIG_PROC_FS
1974/* Depth first Trie walk iterator */
1975struct fib_trie_iter {
1976 struct tnode *tnode;
1977 struct trie *trie;
1978 unsigned index;
1979 unsigned depth;
1980};
1975 1981
1976static void putspace_seq(struct seq_file *seq, int n) 1982static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
1977{ 1983{
1978 while (n--) 1984 struct tnode *tn = iter->tnode;
1979 seq_printf(seq, " "); 1985 unsigned cindex = iter->index;
1980} 1986 struct tnode *p;
1981 1987
1982static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) 1988 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1983{ 1989 iter->tnode, iter->index, iter->depth);
1984 while (bits--) 1990rescan:
1985 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0"); 1991 while (cindex < (1<<tn->bits)) {
1986} 1992 struct node *n = tnode_get_child(tn, cindex);
1987 1993
1988static void printnode_seq(struct seq_file *seq, int indent, struct node *n, 1994 if (n) {
1989 int pend, int cindex, int bits) 1995 if (IS_LEAF(n)) {
1990{ 1996 iter->tnode = tn;
1991 putspace_seq(seq, indent); 1997 iter->index = cindex + 1;
1992 if (IS_LEAF(n)) 1998 } else {
1993 seq_printf(seq, "|"); 1999 /* push down one level */
1994 else 2000 iter->tnode = (struct tnode *) n;
1995 seq_printf(seq, "+"); 2001 iter->index = 0;
1996 if (bits) { 2002 ++iter->depth;
1997 seq_printf(seq, "%d/", cindex); 2003 }
1998 printbin_seq(seq, cindex, bits); 2004 return n;
1999 seq_printf(seq, ": "); 2005 }
2000 } else
2001 seq_printf(seq, "<root>: ");
2002 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
2003 2006
2004 if (IS_LEAF(n)) { 2007 ++cindex;
2005 struct leaf *l = (struct leaf *)n; 2008 }
2006 struct fib_alias *fa;
2007 int i;
2008 2009
2009 seq_printf(seq, "key=%d.%d.%d.%d\n", 2010 /* Current node exhausted, pop back up */
2010 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); 2011 p = NODE_PARENT(tn);
2011 2012 if (p) {
2012 for (i = 32; i >= 0; i--) 2013 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2013 if (find_leaf_info(&l->list, i)) { 2014 tn = p;
2014 struct list_head *fa_head = get_fa_head(l, i); 2015 --iter->depth;
2015 2016 goto rescan;
2016 if (!fa_head)
2017 continue;
2018
2019 if (list_empty(fa_head))
2020 continue;
2021
2022 putspace_seq(seq, indent+2);
2023 seq_printf(seq, "{/%d...dumping}\n", i);
2024
2025 list_for_each_entry_rcu(fa, fa_head, fa_list) {
2026 putspace_seq(seq, indent+2);
2027 if (fa->fa_info == NULL) {
2028 seq_printf(seq, "Error fa_info=NULL\n");
2029 continue;
2030 }
2031 if (fa->fa_info->fib_nh == NULL) {
2032 seq_printf(seq, "Error _fib_nh=NULL\n");
2033 continue;
2034 }
2035
2036 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2037 fa->fa_type,
2038 fa->fa_scope,
2039 fa->fa_tos);
2040 }
2041 }
2042 } else {
2043 struct tnode *tn = (struct tnode *)n;
2044 int plen = ((struct tnode *)n)->pos;
2045 t_key prf = MASK_PFX(n->key, plen);
2046
2047 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2048 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2049
2050 putspace_seq(seq, indent); seq_printf(seq, "| ");
2051 seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
2052 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2053 seq_printf(seq, "}\n");
2054 putspace_seq(seq, indent); seq_printf(seq, "| ");
2055 seq_printf(seq, "{pos=%d", tn->pos);
2056 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2057 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2058 putspace_seq(seq, indent); seq_printf(seq, "| ");
2059 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2060 } 2017 }
2018
2019 /* got root? */
2020 return NULL;
2061} 2021}
2062 2022
2063static void trie_dump_seq(struct seq_file *seq, struct trie *t) 2023static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2024 struct trie *t)
2064{ 2025{
2065 struct node *n; 2026 struct node *n = rcu_dereference(t->trie);
2066 int cindex = 0;
2067 int indent = 1;
2068 int pend = 0;
2069 int depth = 0;
2070 struct tnode *tn;
2071
2072 rcu_read_lock();
2073 n = rcu_dereference(t->trie);
2074 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2075 2027
2076 if (!n) { 2028 if (n && IS_TNODE(n)) {
2077 seq_printf(seq, "------ trie is empty\n"); 2029 iter->tnode = (struct tnode *) n;
2078 2030 iter->trie = t;
2079 rcu_read_unlock(); 2031 iter->index = 0;
2080 return; 2032 iter->depth = 0;
2033 return n;
2081 } 2034 }
2035 return NULL;
2036}
2082 2037
2083 printnode_seq(seq, indent, n, pend, cindex, 0); 2038static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2084 2039{
2085 if (!IS_TNODE(n)) { 2040 struct node *n;
2086 rcu_read_unlock(); 2041 struct fib_trie_iter iter;
2087 return;
2088 }
2089
2090 tn = (struct tnode *)n;
2091 pend = tn->pos+tn->bits;
2092 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2093 indent += 3;
2094 depth++;
2095
2096 while (tn && cindex < (1 << tn->bits)) {
2097 struct node *child = rcu_dereference(tn->child[cindex]);
2098 if (!child)
2099 cindex++;
2100 else {
2101 /* Got a child */
2102 printnode_seq(seq, indent, child, pend,
2103 cindex, tn->bits);
2104
2105 if (IS_LEAF(child))
2106 cindex++;
2107
2108 else {
2109 /*
2110 * New tnode. Decend one level
2111 */
2112
2113 depth++;
2114 n = child;
2115 tn = (struct tnode *)n;
2116 pend = tn->pos+tn->bits;
2117 putspace_seq(seq, indent);
2118 seq_printf(seq, "\\--\n");
2119 indent += 3;
2120 cindex = 0;
2121 }
2122 }
2123
2124 /*
2125 * Test if we are done
2126 */
2127
2128 while (cindex >= (1 << tn->bits)) {
2129 /*
2130 * Move upwards and test for root
2131 * pop off all traversed nodes
2132 */
2133 2042
2134 if (NODE_PARENT(tn) == NULL) { 2043 memset(s, 0, sizeof(*s));
2135 tn = NULL;
2136 break;
2137 }
2138 2044
2139 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2045 rcu_read_lock();
2140 cindex++; 2046 for (n = fib_trie_get_first(&iter, t); n;
2141 tn = NODE_PARENT(tn); 2047 n = fib_trie_get_next(&iter)) {
2142 pend = tn->pos + tn->bits; 2048 if (IS_LEAF(n)) {
2143 indent -= 3; 2049 s->leaves++;
2144 depth--; 2050 s->totdepth += iter.depth;
2051 if (iter.depth > s->maxdepth)
2052 s->maxdepth = iter.depth;
2053 } else {
2054 const struct tnode *tn = (const struct tnode *) n;
2055 int i;
2056
2057 s->tnodes++;
2058 s->nodesizes[tn->bits]++;
2059 for (i = 0; i < (1<<tn->bits); i++)
2060 if (!tn->child[i])
2061 s->nullpointers++;
2145 } 2062 }
2146 } 2063 }
2147 rcu_read_unlock(); 2064 rcu_read_unlock();
2148} 2065}
2149 2066
2150static struct trie_stat *trie_stat_new(void) 2067/*
2068 * This outputs /proc/net/fib_triestats
2069 */
2070static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2151{ 2071{
2152 struct trie_stat *s; 2072 unsigned i, max, pointers, bytes, avdepth;
2153 int i;
2154 2073
2155 s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); 2074 if (stat->leaves)
2156 if (!s) 2075 avdepth = stat->totdepth*100 / stat->leaves;
2157 return NULL; 2076 else
2077 avdepth = 0;
2158 2078
2159 s->totdepth = 0; 2079 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2160 s->maxdepth = 0; 2080 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2161 s->tnodes = 0;
2162 s->leaves = 0;
2163 s->nullpointers = 0;
2164 2081
2165 for (i = 0; i < MAX_CHILDS; i++) 2082 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2166 s->nodesizes[i] = 0;
2167 2083
2168 return s; 2084 bytes = sizeof(struct leaf) * stat->leaves;
2169} 2085 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2086 bytes += sizeof(struct tnode) * stat->tnodes;
2170 2087
2171static struct trie_stat *trie_collect_stats(struct trie *t) 2088 max = MAX_CHILDS-1;
2172{ 2089 while (max >= 0 && stat->nodesizes[max] == 0)
2173 struct node *n; 2090 max--;
2174 struct trie_stat *s = trie_stat_new();
2175 int cindex = 0;
2176 int pend = 0;
2177 int depth = 0;
2178 2091
2179 if (!s) 2092 pointers = 0;
2180 return NULL; 2093 for (i = 1; i <= max; i++)
2094 if (stat->nodesizes[i] != 0) {
2095 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2096 pointers += (1<<i) * stat->nodesizes[i];
2097 }
2098 seq_putc(seq, '\n');
2099 seq_printf(seq, "\tPointers: %d\n", pointers);
2181 2100
2182 rcu_read_lock(); 2101 bytes += sizeof(struct node *) * pointers;
2183 n = rcu_dereference(t->trie); 2102 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2103 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
2184 2104
2185 if (!n) 2105#ifdef CONFIG_IP_FIB_TRIE_STATS
2186 return s; 2106 seq_printf(seq, "Counters:\n---------\n");
2107 seq_printf(seq,"gets = %d\n", t->stats.gets);
2108 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2109 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2110 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2111 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2112 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2113#ifdef CLEAR_STATS
2114 memset(&(t->stats), 0, sizeof(t->stats));
2115#endif
2116#endif /* CONFIG_IP_FIB_TRIE_STATS */
2117}
2187 2118
2188 if (IS_TNODE(n)) { 2119static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2189 struct tnode *tn = (struct tnode *)n; 2120{
2190 pend = tn->pos+tn->bits; 2121 struct trie_stat *stat;
2191 s->nodesizes[tn->bits]++;
2192 depth++;
2193
2194 while (tn && cindex < (1 << tn->bits)) {
2195 struct node *ch = rcu_dereference(tn->child[cindex]);
2196 if (ch) {
2197
2198 /* Got a child */
2199
2200 if (IS_LEAF(tn->child[cindex])) {
2201 cindex++;
2202
2203 /* stats */
2204 if (depth > s->maxdepth)
2205 s->maxdepth = depth;
2206 s->totdepth += depth;
2207 s->leaves++;
2208 } else {
2209 /*
2210 * New tnode. Decend one level
2211 */
2212
2213 s->tnodes++;
2214 s->nodesizes[tn->bits]++;
2215 depth++;
2216
2217 n = ch;
2218 tn = (struct tnode *)n;
2219 pend = tn->pos+tn->bits;
2220
2221 cindex = 0;
2222 }
2223 } else {
2224 cindex++;
2225 s->nullpointers++;
2226 }
2227 2122
2228 /* 2123 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2229 * Test if we are done 2124 if (!stat)
2230 */ 2125 return -ENOMEM;
2231 2126
2232 while (cindex >= (1 << tn->bits)) { 2127 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2233 /* 2128 sizeof(struct leaf), sizeof(struct tnode));
2234 * Move upwards and test for root
2235 * pop off all traversed nodes
2236 */
2237 2129
2238 if (NODE_PARENT(tn) == NULL) { 2130 if (trie_local) {
2239 tn = NULL; 2131 seq_printf(seq, "Local:\n");
2240 n = NULL; 2132 trie_collect_stats(trie_local, stat);
2241 break; 2133 trie_show_stats(seq, stat);
2242 } 2134 }
2243 2135
2244 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2136 if (trie_main) {
2245 tn = NODE_PARENT(tn); 2137 seq_printf(seq, "Main:\n");
2246 cindex++; 2138 trie_collect_stats(trie_main, stat);
2247 n = (struct node *)tn; 2139 trie_show_stats(seq, stat);
2248 pend = tn->pos+tn->bits;
2249 depth--;
2250 }
2251 }
2252 } 2140 }
2141 kfree(stat);
2253 2142
2254 rcu_read_unlock(); 2143 return 0;
2255 return s;
2256} 2144}
2257 2145
2258#ifdef CONFIG_PROC_FS 2146static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2259
2260static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2261{ 2147{
2262 return NULL; 2148 return single_open(file, fib_triestat_seq_show, NULL);
2263} 2149}
2264 2150
2265static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) 2151static struct file_operations fib_triestat_fops = {
2152 .owner = THIS_MODULE,
2153 .open = fib_triestat_seq_open,
2154 .read = seq_read,
2155 .llseek = seq_lseek,
2156 .release = single_release,
2157};
2158
2159static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2160 loff_t pos)
2266{ 2161{
2162 loff_t idx = 0;
2163 struct node *n;
2164
2165 for (n = fib_trie_get_first(iter, trie_local);
2166 n; ++idx, n = fib_trie_get_next(iter)) {
2167 if (pos == idx)
2168 return n;
2169 }
2170
2171 for (n = fib_trie_get_first(iter, trie_main);
2172 n; ++idx, n = fib_trie_get_next(iter)) {
2173 if (pos == idx)
2174 return n;
2175 }
2267 return NULL; 2176 return NULL;
2268} 2177}
2269 2178
2270static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) 2179static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2271{ 2180{
2272 if (!ip_fib_main_table) 2181 rcu_read_lock();
2273 return NULL; 2182 if (*pos == 0)
2274
2275 if (*pos)
2276 return fib_triestat_get_next(seq);
2277 else
2278 return SEQ_START_TOKEN; 2183 return SEQ_START_TOKEN;
2184 return fib_trie_get_idx(seq->private, *pos - 1);
2279} 2185}
2280 2186
2281static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2187static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2282{ 2188{
2189 struct fib_trie_iter *iter = seq->private;
2190 void *l = v;
2191
2283 ++*pos; 2192 ++*pos;
2284 if (v == SEQ_START_TOKEN) 2193 if (v == SEQ_START_TOKEN)
2285 return fib_triestat_get_first(seq); 2194 return fib_trie_get_idx(iter, 0);
2286 else
2287 return fib_triestat_get_next(seq);
2288}
2289 2195
2290static void fib_triestat_seq_stop(struct seq_file *seq, void *v) 2196 v = fib_trie_get_next(iter);
2291{ 2197 BUG_ON(v == l);
2198 if (v)
2199 return v;
2292 2200
2293} 2201 /* continue scan in next trie */
2202 if (iter->trie == trie_local)
2203 return fib_trie_get_first(iter, trie_main);
2294 2204
2295/* 2205 return NULL;
2296 * This outputs /proc/net/fib_triestats 2206}
2297 *
2298 * It always works in backward compatibility mode.
2299 * The format of the file is not supposed to be changed.
2300 */
2301 2207
2302static void collect_and_show(struct trie *t, struct seq_file *seq) 2208static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2303{ 2209{
2304 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ 2210 rcu_read_unlock();
2305 int i, max, pointers; 2211}
2306 struct trie_stat *stat;
2307 int avdepth;
2308
2309 stat = trie_collect_stats(t);
2310
2311 bytes = 0;
2312 seq_printf(seq, "trie=%p\n", t);
2313
2314 if (stat) {
2315 if (stat->leaves)
2316 avdepth = stat->totdepth*100 / stat->leaves;
2317 else
2318 avdepth = 0;
2319 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
2320 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2321 2212
2322 seq_printf(seq, "Leaves: %d\n", stat->leaves); 2213static void seq_indent(struct seq_file *seq, int n)
2323 bytes += sizeof(struct leaf) * stat->leaves; 2214{
2324 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); 2215 while (n-- > 0) seq_puts(seq, " ");
2325 bytes += sizeof(struct tnode) * stat->tnodes; 2216}
2326 2217
2327 max = MAX_CHILDS-1; 2218static inline const char *rtn_scope(enum rt_scope_t s)
2219{
2220 static char buf[32];
2328 2221
2329 while (max >= 0 && stat->nodesizes[max] == 0) 2222 switch(s) {
2330 max--; 2223 case RT_SCOPE_UNIVERSE: return "universe";
2331 pointers = 0; 2224 case RT_SCOPE_SITE: return "site";
2225 case RT_SCOPE_LINK: return "link";
2226 case RT_SCOPE_HOST: return "host";
2227 case RT_SCOPE_NOWHERE: return "nowhere";
2228 default:
2229 snprintf(buf, sizeof(buf), "scope=%d", s);
2230 return buf;
2231 }
2232}
2332 2233
2333 for (i = 1; i <= max; i++) 2234static const char *rtn_type_names[__RTN_MAX] = {
2334 if (stat->nodesizes[i] != 0) { 2235 [RTN_UNSPEC] = "UNSPEC",
2335 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 2236 [RTN_UNICAST] = "UNICAST",
2336 pointers += (1<<i) * stat->nodesizes[i]; 2237 [RTN_LOCAL] = "LOCAL",
2337 } 2238 [RTN_BROADCAST] = "BROADCAST",
2338 seq_printf(seq, "\n"); 2239 [RTN_ANYCAST] = "ANYCAST",
2339 seq_printf(seq, "Pointers: %d\n", pointers); 2240 [RTN_MULTICAST] = "MULTICAST",
2340 bytes += sizeof(struct node *) * pointers; 2241 [RTN_BLACKHOLE] = "BLACKHOLE",
2341 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2242 [RTN_UNREACHABLE] = "UNREACHABLE",
2342 seq_printf(seq, "Total size: %d kB\n", bytes / 1024); 2243 [RTN_PROHIBIT] = "PROHIBIT",
2244 [RTN_THROW] = "THROW",
2245 [RTN_NAT] = "NAT",
2246 [RTN_XRESOLVE] = "XRESOLVE",
2247};
2343 2248
2344 kfree(stat); 2249static inline const char *rtn_type(unsigned t)
2345 } 2250{
2251 static char buf[32];
2346 2252
2347#ifdef CONFIG_IP_FIB_TRIE_STATS 2253 if (t < __RTN_MAX && rtn_type_names[t])
2348 seq_printf(seq, "Counters:\n---------\n"); 2254 return rtn_type_names[t];
2349 seq_printf(seq,"gets = %d\n", t->stats.gets); 2255 snprintf(buf, sizeof(buf), "type %d", t);
2350 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 2256 return buf;
2351 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2352 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2353 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2354 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2355#ifdef CLEAR_STATS
2356 memset(&(t->stats), 0, sizeof(t->stats));
2357#endif
2358#endif /* CONFIG_IP_FIB_TRIE_STATS */
2359} 2257}
2360 2258
2361static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2259/* Pretty print the trie */
2260static int fib_trie_seq_show(struct seq_file *seq, void *v)
2362{ 2261{
2363 char bf[128]; 2262 const struct fib_trie_iter *iter = seq->private;
2263 struct node *n = v;
2364 2264
2365 if (v == SEQ_START_TOKEN) { 2265 if (v == SEQ_START_TOKEN)
2366 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 2266 return 0;
2367 sizeof(struct leaf), sizeof(struct tnode));
2368 if (trie_local)
2369 collect_and_show(trie_local, seq);
2370 2267
2371 if (trie_main) 2268 if (IS_TNODE(n)) {
2372 collect_and_show(trie_main, seq); 2269 struct tnode *tn = (struct tnode *) n;
2373 } else { 2270 t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
2374 snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
2375 2271
2376 seq_printf(seq, "%-127s\n", bf); 2272 if (!NODE_PARENT(n)) {
2273 if (iter->trie == trie_local)
2274 seq_puts(seq, "<local>:\n");
2275 else
2276 seq_puts(seq, "<main>:\n");
2277 } else {
2278 seq_indent(seq, iter->depth-1);
2279 seq_printf(seq, " +-- %d.%d.%d.%d/%d\n",
2280 NIPQUAD(prf), tn->pos);
2281 }
2282 } else {
2283 struct leaf *l = (struct leaf *) n;
2284 int i;
2285 u32 val = ntohl(l->key);
2286
2287 seq_indent(seq, iter->depth);
2288 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2289 for (i = 32; i >= 0; i--) {
2290 struct leaf_info *li = find_leaf_info(&l->list, i);
2291 if (li) {
2292 struct fib_alias *fa;
2293 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2294 seq_indent(seq, iter->depth+1);
2295 seq_printf(seq, " /%d %s %s", i,
2296 rtn_scope(fa->fa_scope),
2297 rtn_type(fa->fa_type));
2298 if (fa->fa_tos)
2299 seq_printf(seq, "tos =%d\n",
2300 fa->fa_tos);
2301 seq_putc(seq, '\n');
2302 }
2303 }
2304 }
2377 } 2305 }
2306
2378 return 0; 2307 return 0;
2379} 2308}
2380 2309
2381static struct seq_operations fib_triestat_seq_ops = { 2310static struct seq_operations fib_trie_seq_ops = {
2382 .start = fib_triestat_seq_start, 2311 .start = fib_trie_seq_start,
2383 .next = fib_triestat_seq_next, 2312 .next = fib_trie_seq_next,
2384 .stop = fib_triestat_seq_stop, 2313 .stop = fib_trie_seq_stop,
2385 .show = fib_triestat_seq_show, 2314 .show = fib_trie_seq_show,
2386}; 2315};
2387 2316
2388static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2317static int fib_trie_seq_open(struct inode *inode, struct file *file)
2389{ 2318{
2390 struct seq_file *seq; 2319 struct seq_file *seq;
2391 int rc = -ENOMEM; 2320 int rc = -ENOMEM;
2321 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2392 2322
2393 rc = seq_open(file, &fib_triestat_seq_ops); 2323 if (!s)
2324 goto out;
2325
2326 rc = seq_open(file, &fib_trie_seq_ops);
2394 if (rc) 2327 if (rc)
2395 goto out_kfree; 2328 goto out_kfree;
2396 2329
2397 seq = file->private_data; 2330 seq = file->private_data;
2331 seq->private = s;
2332 memset(s, 0, sizeof(*s));
2398out: 2333out:
2399 return rc; 2334 return rc;
2400out_kfree: 2335out_kfree:
2336 kfree(s);
2401 goto out; 2337 goto out;
2402} 2338}
2403 2339
2404static struct file_operations fib_triestat_seq_fops = { 2340static struct file_operations fib_trie_fops = {
2405 .owner = THIS_MODULE, 2341 .owner = THIS_MODULE,
2406 .open = fib_triestat_seq_open, 2342 .open = fib_trie_seq_open,
2407 .read = seq_read, 2343 .read = seq_read,
2408 .llseek = seq_lseek, 2344 .llseek = seq_lseek,
2409 .release = seq_release_private, 2345 .release = seq_release_private,
2410}; 2346};
2411 2347
2412int __init fib_stat_proc_init(void) 2348static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
2413{
2414 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2415 return -ENOMEM;
2416 return 0;
2417}
2418
2419void __init fib_stat_proc_exit(void)
2420{ 2349{
2421 proc_net_remove("fib_triestat"); 2350 static unsigned type2flags[RTN_MAX + 1] = {
2422} 2351 [7] = RTF_REJECT, [8] = RTF_REJECT,
2352 };
2353 unsigned flags = type2flags[type];
2423 2354
2424static struct fib_alias *fib_trie_get_first(struct seq_file *seq) 2355 if (fi && fi->fib_nh->nh_gw)
2425{ 2356 flags |= RTF_GATEWAY;
2426 return NULL; 2357 if (mask == 0xFFFFFFFF)
2358 flags |= RTF_HOST;
2359 flags |= RTF_UP;
2360 return flags;
2427} 2361}
2428 2362
2429static struct fib_alias *fib_trie_get_next(struct seq_file *seq) 2363/*
2364 * This outputs /proc/net/route.
2365 * The format of the file is not supposed to be changed
2366 * and needs to be same as fib_hash output to avoid breaking
2367 * legacy utilities
2368 */
2369static int fib_route_seq_show(struct seq_file *seq, void *v)
2430{ 2370{
2431 return NULL; 2371 struct leaf *l = v;
2432} 2372 int i;
2373 char bf[128];
2433 2374
2434static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2375 if (v == SEQ_START_TOKEN) {
2435{ 2376 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2436 if (!ip_fib_main_table) 2377 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2437 return NULL; 2378 "\tWindow\tIRTT");
2379 return 0;
2380 }
2438 2381
2439 if (*pos) 2382 if (IS_TNODE(l))
2440 return fib_trie_get_next(seq); 2383 return 0;
2441 else
2442 return SEQ_START_TOKEN;
2443}
2444 2384
2445static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2385 for (i=32; i>=0; i--) {
2446{ 2386 struct leaf_info *li = find_leaf_info(&l->list, i);
2447 ++*pos; 2387 struct fib_alias *fa;
2448 if (v == SEQ_START_TOKEN) 2388 u32 mask, prefix;
2449 return fib_trie_get_first(seq);
2450 else
2451 return fib_trie_get_next(seq);
2452 2389
2453} 2390 if (!li)
2391 continue;
2454 2392
2455static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2393 mask = inet_make_mask(li->plen);
2456{ 2394 prefix = htonl(l->key);
2457}
2458 2395
2459/* 2396 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2460 * This outputs /proc/net/fib_trie. 2397 const struct fib_info *fi = rcu_dereference(fa->fa_info);
2461 * 2398 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2462 * It always works in backward compatibility mode.
2463 * The format of the file is not supposed to be changed.
2464 */
2465 2399
2466static int fib_trie_seq_show(struct seq_file *seq, void *v) 2400 if (fa->fa_type == RTN_BROADCAST
2467{ 2401 || fa->fa_type == RTN_MULTICAST)
2468 char bf[128]; 2402 continue;
2469 2403
2470 if (v == SEQ_START_TOKEN) { 2404 if (fi)
2471 if (trie_local) 2405 snprintf(bf, sizeof(bf),
2472 trie_dump_seq(seq, trie_local); 2406 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2407 fi->fib_dev ? fi->fib_dev->name : "*",
2408 prefix,
2409 fi->fib_nh->nh_gw, flags, 0, 0,
2410 fi->fib_priority,
2411 mask,
2412 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2413 fi->fib_window,
2414 fi->fib_rtt >> 3);
2415 else
2416 snprintf(bf, sizeof(bf),
2417 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2418 prefix, 0, flags, 0, 0, 0,
2419 mask, 0, 0, 0);
2473 2420
2474 if (trie_main) 2421 seq_printf(seq, "%-127s\n", bf);
2475 trie_dump_seq(seq, trie_main); 2422 }
2476 } else {
2477 snprintf(bf, sizeof(bf),
2478 "*\t%08X\t%08X", 200, 400);
2479 seq_printf(seq, "%-127s\n", bf);
2480 } 2423 }
2481 2424
2482 return 0; 2425 return 0;
2483} 2426}
2484 2427
2485static struct seq_operations fib_trie_seq_ops = { 2428static struct seq_operations fib_route_seq_ops = {
2486 .start = fib_trie_seq_start, 2429 .start = fib_trie_seq_start,
2487 .next = fib_trie_seq_next, 2430 .next = fib_trie_seq_next,
2488 .stop = fib_trie_seq_stop, 2431 .stop = fib_trie_seq_stop,
2489 .show = fib_trie_seq_show, 2432 .show = fib_route_seq_show,
2490}; 2433};
2491 2434
2492static int fib_trie_seq_open(struct inode *inode, struct file *file) 2435static int fib_route_seq_open(struct inode *inode, struct file *file)
2493{ 2436{
2494 struct seq_file *seq; 2437 struct seq_file *seq;
2495 int rc = -ENOMEM; 2438 int rc = -ENOMEM;
2439 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2496 2440
2497 rc = seq_open(file, &fib_trie_seq_ops); 2441 if (!s)
2442 goto out;
2443
2444 rc = seq_open(file, &fib_route_seq_ops);
2498 if (rc) 2445 if (rc)
2499 goto out_kfree; 2446 goto out_kfree;
2500 2447
2501 seq = file->private_data; 2448 seq = file->private_data;
2449 seq->private = s;
2450 memset(s, 0, sizeof(*s));
2502out: 2451out:
2503 return rc; 2452 return rc;
2504out_kfree: 2453out_kfree:
2454 kfree(s);
2505 goto out; 2455 goto out;
2506} 2456}
2507 2457
2508static struct file_operations fib_trie_seq_fops = { 2458static struct file_operations fib_route_fops = {
2509 .owner = THIS_MODULE, 2459 .owner = THIS_MODULE,
2510 .open = fib_trie_seq_open, 2460 .open = fib_route_seq_open,
2511 .read = seq_read, 2461 .read = seq_read,
2512 .llseek = seq_lseek, 2462 .llseek = seq_lseek,
2513 .release= seq_release_private, 2463 .release = seq_release_private,
2514}; 2464};
2515 2465
2516int __init fib_proc_init(void) 2466int __init fib_proc_init(void)
2517{ 2467{
2518 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) 2468 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2519 return -ENOMEM; 2469 goto out1;
2470
2471 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2472 goto out2;
2473
2474 if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2475 goto out3;
2476
2520 return 0; 2477 return 0;
2478
2479out3:
2480 proc_net_remove("fib_triestat");
2481out2:
2482 proc_net_remove("fib_trie");
2483out1:
2484 return -ENOMEM;
2521} 2485}
2522 2486
2523void __init fib_proc_exit(void) 2487void __init fib_proc_exit(void)
2524{ 2488{
2525 proc_net_remove("fib_trie"); 2489 proc_net_remove("fib_trie");
2490 proc_net_remove("fib_triestat");
2491 proc_net_remove("route");
2526} 2492}
2527 2493
2528#endif /* CONFIG_PROC_FS */ 2494#endif /* CONFIG_PROC_FS */