diff options
232 files changed, 6891 insertions, 3091 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index b959659c5df4..ccb6048415b2 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -603,3 +603,10 @@ Why: The adm9240, w83792d and w83793 hardware monitoring drivers have | |||
603 | Who: Jean Delvare <khali@linux-fr.org> | 603 | Who: Jean Delvare <khali@linux-fr.org> |
604 | 604 | ||
605 | ---------------------------- | 605 | ---------------------------- |
606 | |||
607 | What: xt_connlimit rev 0 | ||
608 | When: 2012 | ||
609 | Who: Jan Engelhardt <jengelh@medozas.de> | ||
610 | Files: net/netfilter/xt_connlimit.c | ||
611 | |||
612 | ---------------------------- | ||
diff --git a/drivers/net/atl1c/atl1c_hw.c b/drivers/net/atl1c/atl1c_hw.c index 1bf672009948..23f2ab0f2fa8 100644 --- a/drivers/net/atl1c/atl1c_hw.c +++ b/drivers/net/atl1c/atl1c_hw.c | |||
@@ -345,7 +345,7 @@ int atl1c_write_phy_reg(struct atl1c_hw *hw, u32 reg_addr, u16 phy_data) | |||
345 | */ | 345 | */ |
346 | static int atl1c_phy_setup_adv(struct atl1c_hw *hw) | 346 | static int atl1c_phy_setup_adv(struct atl1c_hw *hw) |
347 | { | 347 | { |
348 | u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_SPEED_MASK; | 348 | u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_ALL; |
349 | u16 mii_giga_ctrl_data = GIGA_CR_1000T_DEFAULT_CAP & | 349 | u16 mii_giga_ctrl_data = GIGA_CR_1000T_DEFAULT_CAP & |
350 | ~GIGA_CR_1000T_SPEED_MASK; | 350 | ~GIGA_CR_1000T_SPEED_MASK; |
351 | 351 | ||
@@ -373,7 +373,7 @@ static int atl1c_phy_setup_adv(struct atl1c_hw *hw) | |||
373 | } | 373 | } |
374 | 374 | ||
375 | if (atl1c_write_phy_reg(hw, MII_ADVERTISE, mii_adv_data) != 0 || | 375 | if (atl1c_write_phy_reg(hw, MII_ADVERTISE, mii_adv_data) != 0 || |
376 | atl1c_write_phy_reg(hw, MII_GIGA_CR, mii_giga_ctrl_data) != 0) | 376 | atl1c_write_phy_reg(hw, MII_CTRL1000, mii_giga_ctrl_data) != 0) |
377 | return -1; | 377 | return -1; |
378 | return 0; | 378 | return 0; |
379 | } | 379 | } |
@@ -517,19 +517,18 @@ int atl1c_phy_init(struct atl1c_hw *hw) | |||
517 | "Error Setting up Auto-Negotiation\n"); | 517 | "Error Setting up Auto-Negotiation\n"); |
518 | return ret_val; | 518 | return ret_val; |
519 | } | 519 | } |
520 | mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG; | 520 | mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART; |
521 | break; | 521 | break; |
522 | case MEDIA_TYPE_100M_FULL: | 522 | case MEDIA_TYPE_100M_FULL: |
523 | mii_bmcr_data |= BMCR_SPEED_100 | BMCR_FULL_DUPLEX; | 523 | mii_bmcr_data |= BMCR_SPEED100 | BMCR_FULLDPLX; |
524 | break; | 524 | break; |
525 | case MEDIA_TYPE_100M_HALF: | 525 | case MEDIA_TYPE_100M_HALF: |
526 | mii_bmcr_data |= BMCR_SPEED_100; | 526 | mii_bmcr_data |= BMCR_SPEED100; |
527 | break; | 527 | break; |
528 | case MEDIA_TYPE_10M_FULL: | 528 | case MEDIA_TYPE_10M_FULL: |
529 | mii_bmcr_data |= BMCR_SPEED_10 | BMCR_FULL_DUPLEX; | 529 | mii_bmcr_data |= BMCR_FULLDPLX; |
530 | break; | 530 | break; |
531 | case MEDIA_TYPE_10M_HALF: | 531 | case MEDIA_TYPE_10M_HALF: |
532 | mii_bmcr_data |= BMCR_SPEED_10; | ||
533 | break; | 532 | break; |
534 | default: | 533 | default: |
535 | if (netif_msg_link(adapter)) | 534 | if (netif_msg_link(adapter)) |
@@ -657,7 +656,7 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw) | |||
657 | err = atl1c_phy_setup_adv(hw); | 656 | err = atl1c_phy_setup_adv(hw); |
658 | if (err) | 657 | if (err) |
659 | return err; | 658 | return err; |
660 | mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG; | 659 | mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART; |
661 | 660 | ||
662 | return atl1c_write_phy_reg(hw, MII_BMCR, mii_bmcr_data); | 661 | return atl1c_write_phy_reg(hw, MII_BMCR, mii_bmcr_data); |
663 | } | 662 | } |
diff --git a/drivers/net/atl1c/atl1c_hw.h b/drivers/net/atl1c/atl1c_hw.h index 3dd675979aa1..655fc6c4a8a4 100644 --- a/drivers/net/atl1c/atl1c_hw.h +++ b/drivers/net/atl1c/atl1c_hw.h | |||
@@ -736,55 +736,16 @@ int atl1c_phy_power_saving(struct atl1c_hw *hw); | |||
736 | #define REG_DEBUG_DATA0 0x1900 | 736 | #define REG_DEBUG_DATA0 0x1900 |
737 | #define REG_DEBUG_DATA1 0x1904 | 737 | #define REG_DEBUG_DATA1 0x1904 |
738 | 738 | ||
739 | /* PHY Control Register */ | ||
740 | #define MII_BMCR 0x00 | ||
741 | #define BMCR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ | ||
742 | #define BMCR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ | ||
743 | #define BMCR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ | ||
744 | #define BMCR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ | ||
745 | #define BMCR_ISOLATE 0x0400 /* Isolate PHY from MII */ | ||
746 | #define BMCR_POWER_DOWN 0x0800 /* Power down */ | ||
747 | #define BMCR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ | ||
748 | #define BMCR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ | ||
749 | #define BMCR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ | ||
750 | #define BMCR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ | ||
751 | #define BMCR_SPEED_MASK 0x2040 | ||
752 | #define BMCR_SPEED_1000 0x0040 | ||
753 | #define BMCR_SPEED_100 0x2000 | ||
754 | #define BMCR_SPEED_10 0x0000 | ||
755 | |||
756 | /* PHY Status Register */ | ||
757 | #define MII_BMSR 0x01 | ||
758 | #define BMMSR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ | ||
759 | #define BMSR_JABBER_DETECT 0x0002 /* Jabber Detected */ | ||
760 | #define BMSR_LINK_STATUS 0x0004 /* Link Status 1 = link */ | ||
761 | #define BMSR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ | ||
762 | #define BMSR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ | ||
763 | #define BMSR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ | ||
764 | #define BMSR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ | ||
765 | #define BMSR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ | ||
766 | #define BMSR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ | ||
767 | #define BMSR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ | ||
768 | #define BMSR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ | ||
769 | #define BMSR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ | ||
770 | #define BMSR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ | ||
771 | #define BMMII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ | ||
772 | #define BMMII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ | ||
773 | |||
774 | #define MII_PHYSID1 0x02 | ||
775 | #define MII_PHYSID2 0x03 | ||
776 | #define L1D_MPW_PHYID1 0xD01C /* V7 */ | 739 | #define L1D_MPW_PHYID1 0xD01C /* V7 */ |
777 | #define L1D_MPW_PHYID2 0xD01D /* V1-V6 */ | 740 | #define L1D_MPW_PHYID2 0xD01D /* V1-V6 */ |
778 | #define L1D_MPW_PHYID3 0xD01E /* V8 */ | 741 | #define L1D_MPW_PHYID3 0xD01E /* V8 */ |
779 | 742 | ||
780 | 743 | ||
781 | /* Autoneg Advertisement Register */ | 744 | /* Autoneg Advertisement Register */ |
782 | #define MII_ADVERTISE 0x04 | 745 | #define ADVERTISE_DEFAULT_CAP \ |
783 | #define ADVERTISE_SPEED_MASK 0x01E0 | 746 | (ADVERTISE_ALL | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM) |
784 | #define ADVERTISE_DEFAULT_CAP 0x0DE0 | ||
785 | 747 | ||
786 | /* 1000BASE-T Control Register */ | 748 | /* 1000BASE-T Control Register */ |
787 | #define MII_GIGA_CR 0x09 | ||
788 | #define GIGA_CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port 0=DTE device */ | 749 | #define GIGA_CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port 0=DTE device */ |
789 | 750 | ||
790 | #define GIGA_CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master 0=Configure PHY as Slave */ | 751 | #define GIGA_CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master 0=Configure PHY as Slave */ |
diff --git a/drivers/net/atl1e/atl1e_ethtool.c b/drivers/net/atl1e/atl1e_ethtool.c index 6943a6c3b948..1209297433b8 100644 --- a/drivers/net/atl1e/atl1e_ethtool.c +++ b/drivers/net/atl1e/atl1e_ethtool.c | |||
@@ -95,18 +95,18 @@ static int atl1e_set_settings(struct net_device *netdev, | |||
95 | ecmd->advertising = hw->autoneg_advertised | | 95 | ecmd->advertising = hw->autoneg_advertised | |
96 | ADVERTISED_TP | ADVERTISED_Autoneg; | 96 | ADVERTISED_TP | ADVERTISED_Autoneg; |
97 | 97 | ||
98 | adv4 = hw->mii_autoneg_adv_reg & ~MII_AR_SPEED_MASK; | 98 | adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL; |
99 | adv9 = hw->mii_1000t_ctrl_reg & ~MII_AT001_CR_1000T_SPEED_MASK; | 99 | adv9 = hw->mii_1000t_ctrl_reg & ~MII_AT001_CR_1000T_SPEED_MASK; |
100 | if (hw->autoneg_advertised & ADVERTISE_10_HALF) | 100 | if (hw->autoneg_advertised & ADVERTISE_10_HALF) |
101 | adv4 |= MII_AR_10T_HD_CAPS; | 101 | adv4 |= ADVERTISE_10HALF; |
102 | if (hw->autoneg_advertised & ADVERTISE_10_FULL) | 102 | if (hw->autoneg_advertised & ADVERTISE_10_FULL) |
103 | adv4 |= MII_AR_10T_FD_CAPS; | 103 | adv4 |= ADVERTISE_10FULL; |
104 | if (hw->autoneg_advertised & ADVERTISE_100_HALF) | 104 | if (hw->autoneg_advertised & ADVERTISE_100_HALF) |
105 | adv4 |= MII_AR_100TX_HD_CAPS; | 105 | adv4 |= ADVERTISE_100HALF; |
106 | if (hw->autoneg_advertised & ADVERTISE_100_FULL) | 106 | if (hw->autoneg_advertised & ADVERTISE_100_FULL) |
107 | adv4 |= MII_AR_100TX_FD_CAPS; | 107 | adv4 |= ADVERTISE_100FULL; |
108 | if (hw->autoneg_advertised & ADVERTISE_1000_FULL) | 108 | if (hw->autoneg_advertised & ADVERTISE_1000_FULL) |
109 | adv9 |= MII_AT001_CR_1000T_FD_CAPS; | 109 | adv9 |= ADVERTISE_1000FULL; |
110 | 110 | ||
111 | if (adv4 != hw->mii_autoneg_adv_reg || | 111 | if (adv4 != hw->mii_autoneg_adv_reg || |
112 | adv9 != hw->mii_1000t_ctrl_reg) { | 112 | adv9 != hw->mii_1000t_ctrl_reg) { |
diff --git a/drivers/net/atl1e/atl1e_hw.c b/drivers/net/atl1e/atl1e_hw.c index 76cc043def8c..923063d2e5bb 100644 --- a/drivers/net/atl1e/atl1e_hw.c +++ b/drivers/net/atl1e/atl1e_hw.c | |||
@@ -318,7 +318,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw) | |||
318 | * Advertisement Register (Address 4) and the 1000 mb speed bits in | 318 | * Advertisement Register (Address 4) and the 1000 mb speed bits in |
319 | * the 1000Base-T control Register (Address 9). | 319 | * the 1000Base-T control Register (Address 9). |
320 | */ | 320 | */ |
321 | mii_autoneg_adv_reg &= ~MII_AR_SPEED_MASK; | 321 | mii_autoneg_adv_reg &= ~ADVERTISE_ALL; |
322 | mii_1000t_ctrl_reg &= ~MII_AT001_CR_1000T_SPEED_MASK; | 322 | mii_1000t_ctrl_reg &= ~MII_AT001_CR_1000T_SPEED_MASK; |
323 | 323 | ||
324 | /* | 324 | /* |
@@ -327,44 +327,37 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw) | |||
327 | */ | 327 | */ |
328 | switch (hw->media_type) { | 328 | switch (hw->media_type) { |
329 | case MEDIA_TYPE_AUTO_SENSOR: | 329 | case MEDIA_TYPE_AUTO_SENSOR: |
330 | mii_autoneg_adv_reg |= (MII_AR_10T_HD_CAPS | | 330 | mii_autoneg_adv_reg |= ADVERTISE_ALL; |
331 | MII_AR_10T_FD_CAPS | | 331 | hw->autoneg_advertised = ADVERTISE_ALL; |
332 | MII_AR_100TX_HD_CAPS | | ||
333 | MII_AR_100TX_FD_CAPS); | ||
334 | hw->autoneg_advertised = ADVERTISE_10_HALF | | ||
335 | ADVERTISE_10_FULL | | ||
336 | ADVERTISE_100_HALF | | ||
337 | ADVERTISE_100_FULL; | ||
338 | if (hw->nic_type == athr_l1e) { | 332 | if (hw->nic_type == athr_l1e) { |
339 | mii_1000t_ctrl_reg |= | 333 | mii_1000t_ctrl_reg |= ADVERTISE_1000FULL; |
340 | MII_AT001_CR_1000T_FD_CAPS; | ||
341 | hw->autoneg_advertised |= ADVERTISE_1000_FULL; | 334 | hw->autoneg_advertised |= ADVERTISE_1000_FULL; |
342 | } | 335 | } |
343 | break; | 336 | break; |
344 | 337 | ||
345 | case MEDIA_TYPE_100M_FULL: | 338 | case MEDIA_TYPE_100M_FULL: |
346 | mii_autoneg_adv_reg |= MII_AR_100TX_FD_CAPS; | 339 | mii_autoneg_adv_reg |= ADVERTISE_100FULL; |
347 | hw->autoneg_advertised = ADVERTISE_100_FULL; | 340 | hw->autoneg_advertised = ADVERTISE_100_FULL; |
348 | break; | 341 | break; |
349 | 342 | ||
350 | case MEDIA_TYPE_100M_HALF: | 343 | case MEDIA_TYPE_100M_HALF: |
351 | mii_autoneg_adv_reg |= MII_AR_100TX_HD_CAPS; | 344 | mii_autoneg_adv_reg |= ADVERTISE_100_HALF; |
352 | hw->autoneg_advertised = ADVERTISE_100_HALF; | 345 | hw->autoneg_advertised = ADVERTISE_100_HALF; |
353 | break; | 346 | break; |
354 | 347 | ||
355 | case MEDIA_TYPE_10M_FULL: | 348 | case MEDIA_TYPE_10M_FULL: |
356 | mii_autoneg_adv_reg |= MII_AR_10T_FD_CAPS; | 349 | mii_autoneg_adv_reg |= ADVERTISE_10_FULL; |
357 | hw->autoneg_advertised = ADVERTISE_10_FULL; | 350 | hw->autoneg_advertised = ADVERTISE_10_FULL; |
358 | break; | 351 | break; |
359 | 352 | ||
360 | default: | 353 | default: |
361 | mii_autoneg_adv_reg |= MII_AR_10T_HD_CAPS; | 354 | mii_autoneg_adv_reg |= ADVERTISE_10_HALF; |
362 | hw->autoneg_advertised = ADVERTISE_10_HALF; | 355 | hw->autoneg_advertised = ADVERTISE_10_HALF; |
363 | break; | 356 | break; |
364 | } | 357 | } |
365 | 358 | ||
366 | /* flow control fixed to enable all */ | 359 | /* flow control fixed to enable all */ |
367 | mii_autoneg_adv_reg |= (MII_AR_ASM_DIR | MII_AR_PAUSE); | 360 | mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP); |
368 | 361 | ||
369 | hw->mii_autoneg_adv_reg = mii_autoneg_adv_reg; | 362 | hw->mii_autoneg_adv_reg = mii_autoneg_adv_reg; |
370 | hw->mii_1000t_ctrl_reg = mii_1000t_ctrl_reg; | 363 | hw->mii_1000t_ctrl_reg = mii_1000t_ctrl_reg; |
@@ -374,7 +367,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw) | |||
374 | return ret_val; | 367 | return ret_val; |
375 | 368 | ||
376 | if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) { | 369 | if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) { |
377 | ret_val = atl1e_write_phy_reg(hw, MII_AT001_CR, | 370 | ret_val = atl1e_write_phy_reg(hw, MII_CTRL1000, |
378 | mii_1000t_ctrl_reg); | 371 | mii_1000t_ctrl_reg); |
379 | if (ret_val) | 372 | if (ret_val) |
380 | return ret_val; | 373 | return ret_val; |
@@ -397,7 +390,7 @@ int atl1e_phy_commit(struct atl1e_hw *hw) | |||
397 | int ret_val; | 390 | int ret_val; |
398 | u16 phy_data; | 391 | u16 phy_data; |
399 | 392 | ||
400 | phy_data = MII_CR_RESET | MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG; | 393 | phy_data = BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART; |
401 | 394 | ||
402 | ret_val = atl1e_write_phy_reg(hw, MII_BMCR, phy_data); | 395 | ret_val = atl1e_write_phy_reg(hw, MII_BMCR, phy_data); |
403 | if (ret_val) { | 396 | if (ret_val) { |
@@ -645,15 +638,14 @@ int atl1e_restart_autoneg(struct atl1e_hw *hw) | |||
645 | return err; | 638 | return err; |
646 | 639 | ||
647 | if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) { | 640 | if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) { |
648 | err = atl1e_write_phy_reg(hw, MII_AT001_CR, | 641 | err = atl1e_write_phy_reg(hw, MII_CTRL1000, |
649 | hw->mii_1000t_ctrl_reg); | 642 | hw->mii_1000t_ctrl_reg); |
650 | if (err) | 643 | if (err) |
651 | return err; | 644 | return err; |
652 | } | 645 | } |
653 | 646 | ||
654 | err = atl1e_write_phy_reg(hw, MII_BMCR, | 647 | err = atl1e_write_phy_reg(hw, MII_BMCR, |
655 | MII_CR_RESET | MII_CR_AUTO_NEG_EN | | 648 | BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART); |
656 | MII_CR_RESTART_AUTO_NEG); | ||
657 | return err; | 649 | return err; |
658 | } | 650 | } |
659 | 651 | ||
diff --git a/drivers/net/atl1e/atl1e_hw.h b/drivers/net/atl1e/atl1e_hw.h index 5ea2f4d86cfa..74df16aef793 100644 --- a/drivers/net/atl1e/atl1e_hw.h +++ b/drivers/net/atl1e/atl1e_hw.h | |||
@@ -629,127 +629,24 @@ s32 atl1e_restart_autoneg(struct atl1e_hw *hw); | |||
629 | 629 | ||
630 | /***************************** MII definition ***************************************/ | 630 | /***************************** MII definition ***************************************/ |
631 | /* PHY Common Register */ | 631 | /* PHY Common Register */ |
632 | #define MII_BMCR 0x00 | ||
633 | #define MII_BMSR 0x01 | ||
634 | #define MII_PHYSID1 0x02 | ||
635 | #define MII_PHYSID2 0x03 | ||
636 | #define MII_ADVERTISE 0x04 | ||
637 | #define MII_LPA 0x05 | ||
638 | #define MII_EXPANSION 0x06 | ||
639 | #define MII_AT001_CR 0x09 | ||
640 | #define MII_AT001_SR 0x0A | ||
641 | #define MII_AT001_ESR 0x0F | ||
642 | #define MII_AT001_PSCR 0x10 | 632 | #define MII_AT001_PSCR 0x10 |
643 | #define MII_AT001_PSSR 0x11 | 633 | #define MII_AT001_PSSR 0x11 |
644 | #define MII_INT_CTRL 0x12 | 634 | #define MII_INT_CTRL 0x12 |
645 | #define MII_INT_STATUS 0x13 | 635 | #define MII_INT_STATUS 0x13 |
646 | #define MII_SMARTSPEED 0x14 | 636 | #define MII_SMARTSPEED 0x14 |
647 | #define MII_RERRCOUNTER 0x15 | ||
648 | #define MII_SREVISION 0x16 | ||
649 | #define MII_RESV1 0x17 | ||
650 | #define MII_LBRERROR 0x18 | 637 | #define MII_LBRERROR 0x18 |
651 | #define MII_PHYADDR 0x19 | ||
652 | #define MII_RESV2 0x1a | 638 | #define MII_RESV2 0x1a |
653 | #define MII_TPISTATUS 0x1b | ||
654 | #define MII_NCONFIG 0x1c | ||
655 | 639 | ||
656 | #define MII_DBG_ADDR 0x1D | 640 | #define MII_DBG_ADDR 0x1D |
657 | #define MII_DBG_DATA 0x1E | 641 | #define MII_DBG_DATA 0x1E |
658 | 642 | ||
659 | |||
660 | /* PHY Control Register */ | ||
661 | #define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ | ||
662 | #define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ | ||
663 | #define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ | ||
664 | #define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ | ||
665 | #define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ | ||
666 | #define MII_CR_POWER_DOWN 0x0800 /* Power down */ | ||
667 | #define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ | ||
668 | #define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ | ||
669 | #define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ | ||
670 | #define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ | ||
671 | #define MII_CR_SPEED_MASK 0x2040 | ||
672 | #define MII_CR_SPEED_1000 0x0040 | ||
673 | #define MII_CR_SPEED_100 0x2000 | ||
674 | #define MII_CR_SPEED_10 0x0000 | ||
675 | |||
676 | |||
677 | /* PHY Status Register */ | ||
678 | #define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ | ||
679 | #define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ | ||
680 | #define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ | ||
681 | #define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ | ||
682 | #define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ | ||
683 | #define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ | ||
684 | #define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ | ||
685 | #define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ | ||
686 | #define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ | ||
687 | #define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ | ||
688 | #define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ | ||
689 | #define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ | ||
690 | #define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ | ||
691 | #define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ | ||
692 | #define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ | ||
693 | |||
694 | /* Link partner ability register. */ | ||
695 | #define MII_LPA_SLCT 0x001f /* Same as advertise selector */ | ||
696 | #define MII_LPA_10HALF 0x0020 /* Can do 10mbps half-duplex */ | ||
697 | #define MII_LPA_10FULL 0x0040 /* Can do 10mbps full-duplex */ | ||
698 | #define MII_LPA_100HALF 0x0080 /* Can do 100mbps half-duplex */ | ||
699 | #define MII_LPA_100FULL 0x0100 /* Can do 100mbps full-duplex */ | ||
700 | #define MII_LPA_100BASE4 0x0200 /* 100BASE-T4 */ | ||
701 | #define MII_LPA_PAUSE 0x0400 /* PAUSE */ | ||
702 | #define MII_LPA_ASYPAUSE 0x0800 /* Asymmetrical PAUSE */ | ||
703 | #define MII_LPA_RFAULT 0x2000 /* Link partner faulted */ | ||
704 | #define MII_LPA_LPACK 0x4000 /* Link partner acked us */ | ||
705 | #define MII_LPA_NPAGE 0x8000 /* Next page bit */ | ||
706 | |||
707 | /* Autoneg Advertisement Register */ | 643 | /* Autoneg Advertisement Register */ |
708 | #define MII_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ | 644 | #define MII_AR_DEFAULT_CAP_MASK 0 |
709 | #define MII_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ | ||
710 | #define MII_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ | ||
711 | #define MII_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ | ||
712 | #define MII_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ | ||
713 | #define MII_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ | ||
714 | #define MII_AR_PAUSE 0x0400 /* Pause operation desired */ | ||
715 | #define MII_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ | ||
716 | #define MII_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ | ||
717 | #define MII_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ | ||
718 | #define MII_AR_SPEED_MASK 0x01E0 | ||
719 | #define MII_AR_DEFAULT_CAP_MASK 0x0DE0 | ||
720 | 645 | ||
721 | /* 1000BASE-T Control Register */ | 646 | /* 1000BASE-T Control Register */ |
722 | #define MII_AT001_CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ | 647 | #define MII_AT001_CR_1000T_SPEED_MASK \ |
723 | #define MII_AT001_CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ | 648 | (ADVERTISE_1000FULL | ADVERTISE_1000HALF) |
724 | #define MII_AT001_CR_1000T_REPEATER_DTE 0x0400 /* 1=Repeater/switch device port */ | 649 | #define MII_AT001_CR_1000T_DEFAULT_CAP_MASK MII_AT001_CR_1000T_SPEED_MASK |
725 | /* 0=DTE device */ | ||
726 | #define MII_AT001_CR_1000T_MS_VALUE 0x0800 /* 1=Configure PHY as Master */ | ||
727 | /* 0=Configure PHY as Slave */ | ||
728 | #define MII_AT001_CR_1000T_MS_ENABLE 0x1000 /* 1=Master/Slave manual config value */ | ||
729 | /* 0=Automatic Master/Slave config */ | ||
730 | #define MII_AT001_CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ | ||
731 | #define MII_AT001_CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ | ||
732 | #define MII_AT001_CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ | ||
733 | #define MII_AT001_CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ | ||
734 | #define MII_AT001_CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ | ||
735 | #define MII_AT001_CR_1000T_SPEED_MASK 0x0300 | ||
736 | #define MII_AT001_CR_1000T_DEFAULT_CAP_MASK 0x0300 | ||
737 | |||
738 | /* 1000BASE-T Status Register */ | ||
739 | #define MII_AT001_SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ | ||
740 | #define MII_AT001_SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ | ||
741 | #define MII_AT001_SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ | ||
742 | #define MII_AT001_SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ | ||
743 | #define MII_AT001_SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local TX is Master, 0=Slave */ | ||
744 | #define MII_AT001_SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ | ||
745 | #define MII_AT001_SR_1000T_REMOTE_RX_STATUS_SHIFT 12 | ||
746 | #define MII_AT001_SR_1000T_LOCAL_RX_STATUS_SHIFT 13 | ||
747 | |||
748 | /* Extended Status Register */ | ||
749 | #define MII_AT001_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */ | ||
750 | #define MII_AT001_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */ | ||
751 | #define MII_AT001_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */ | ||
752 | #define MII_AT001_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */ | ||
753 | 650 | ||
754 | /* AT001 PHY Specific Control Register */ | 651 | /* AT001 PHY Specific Control Register */ |
755 | #define MII_AT001_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */ | 652 | #define MII_AT001_PSCR_JABBER_DISABLE 0x0001 /* 1=Jabber Function disabled */ |
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index e28f8baf394e..bf7500ccd73f 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c | |||
@@ -2051,9 +2051,9 @@ static int atl1e_suspend(struct pci_dev *pdev, pm_message_t state) | |||
2051 | atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data); | 2051 | atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data); |
2052 | atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data); | 2052 | atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data); |
2053 | 2053 | ||
2054 | mii_advertise_data = MII_AR_10T_HD_CAPS; | 2054 | mii_advertise_data = ADVERTISE_10HALF; |
2055 | 2055 | ||
2056 | if ((atl1e_write_phy_reg(hw, MII_AT001_CR, 0) != 0) || | 2056 | if ((atl1e_write_phy_reg(hw, MII_CTRL1000, 0) != 0) || |
2057 | (atl1e_write_phy_reg(hw, | 2057 | (atl1e_write_phy_reg(hw, |
2058 | MII_ADVERTISE, mii_advertise_data) != 0) || | 2058 | MII_ADVERTISE, mii_advertise_data) != 0) || |
2059 | (atl1e_phy_commit(hw)) != 0) { | 2059 | (atl1e_phy_commit(hw)) != 0) { |
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h index e610e1369053..00bf595ebd67 100644 --- a/drivers/net/e1000e/e1000.h +++ b/drivers/net/e1000e/e1000.h | |||
@@ -364,6 +364,7 @@ struct e1000_adapter { | |||
364 | /* structs defined in e1000_hw.h */ | 364 | /* structs defined in e1000_hw.h */ |
365 | struct e1000_hw hw; | 365 | struct e1000_hw hw; |
366 | 366 | ||
367 | spinlock_t stats64_lock; | ||
367 | struct e1000_hw_stats stats; | 368 | struct e1000_hw_stats stats; |
368 | struct e1000_phy_info phy_info; | 369 | struct e1000_phy_info phy_info; |
369 | struct e1000_phy_stats phy_stats; | 370 | struct e1000_phy_stats phy_stats; |
@@ -494,7 +495,9 @@ extern int e1000e_setup_rx_resources(struct e1000_adapter *adapter); | |||
494 | extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter); | 495 | extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter); |
495 | extern void e1000e_free_rx_resources(struct e1000_adapter *adapter); | 496 | extern void e1000e_free_rx_resources(struct e1000_adapter *adapter); |
496 | extern void e1000e_free_tx_resources(struct e1000_adapter *adapter); | 497 | extern void e1000e_free_tx_resources(struct e1000_adapter *adapter); |
497 | extern void e1000e_update_stats(struct e1000_adapter *adapter); | 498 | extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, |
499 | struct rtnl_link_stats64 | ||
500 | *stats); | ||
498 | extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter); | 501 | extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter); |
499 | extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter); | 502 | extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter); |
500 | extern void e1000e_get_hw_control(struct e1000_adapter *adapter); | 503 | extern void e1000e_get_hw_control(struct e1000_adapter *adapter); |
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index fa08b6336cfb..daa7fe4b9fdd 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c | |||
@@ -46,15 +46,15 @@ struct e1000_stats { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | #define E1000_STAT(str, m) { \ | 48 | #define E1000_STAT(str, m) { \ |
49 | .stat_string = str, \ | 49 | .stat_string = str, \ |
50 | .type = E1000_STATS, \ | 50 | .type = E1000_STATS, \ |
51 | .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \ | 51 | .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \ |
52 | .stat_offset = offsetof(struct e1000_adapter, m) } | 52 | .stat_offset = offsetof(struct e1000_adapter, m) } |
53 | #define E1000_NETDEV_STAT(str, m) { \ | 53 | #define E1000_NETDEV_STAT(str, m) { \ |
54 | .stat_string = str, \ | 54 | .stat_string = str, \ |
55 | .type = NETDEV_STATS, \ | 55 | .type = NETDEV_STATS, \ |
56 | .sizeof_stat = sizeof(((struct net_device *)0)->m), \ | 56 | .sizeof_stat = sizeof(((struct rtnl_link_stats64 *)0)->m), \ |
57 | .stat_offset = offsetof(struct net_device, m) } | 57 | .stat_offset = offsetof(struct rtnl_link_stats64, m) } |
58 | 58 | ||
59 | static const struct e1000_stats e1000_gstrings_stats[] = { | 59 | static const struct e1000_stats e1000_gstrings_stats[] = { |
60 | E1000_STAT("rx_packets", stats.gprc), | 60 | E1000_STAT("rx_packets", stats.gprc), |
@@ -65,21 +65,21 @@ static const struct e1000_stats e1000_gstrings_stats[] = { | |||
65 | E1000_STAT("tx_broadcast", stats.bptc), | 65 | E1000_STAT("tx_broadcast", stats.bptc), |
66 | E1000_STAT("rx_multicast", stats.mprc), | 66 | E1000_STAT("rx_multicast", stats.mprc), |
67 | E1000_STAT("tx_multicast", stats.mptc), | 67 | E1000_STAT("tx_multicast", stats.mptc), |
68 | E1000_NETDEV_STAT("rx_errors", stats.rx_errors), | 68 | E1000_NETDEV_STAT("rx_errors", rx_errors), |
69 | E1000_NETDEV_STAT("tx_errors", stats.tx_errors), | 69 | E1000_NETDEV_STAT("tx_errors", tx_errors), |
70 | E1000_NETDEV_STAT("tx_dropped", stats.tx_dropped), | 70 | E1000_NETDEV_STAT("tx_dropped", tx_dropped), |
71 | E1000_STAT("multicast", stats.mprc), | 71 | E1000_STAT("multicast", stats.mprc), |
72 | E1000_STAT("collisions", stats.colc), | 72 | E1000_STAT("collisions", stats.colc), |
73 | E1000_NETDEV_STAT("rx_length_errors", stats.rx_length_errors), | 73 | E1000_NETDEV_STAT("rx_length_errors", rx_length_errors), |
74 | E1000_NETDEV_STAT("rx_over_errors", stats.rx_over_errors), | 74 | E1000_NETDEV_STAT("rx_over_errors", rx_over_errors), |
75 | E1000_STAT("rx_crc_errors", stats.crcerrs), | 75 | E1000_STAT("rx_crc_errors", stats.crcerrs), |
76 | E1000_NETDEV_STAT("rx_frame_errors", stats.rx_frame_errors), | 76 | E1000_NETDEV_STAT("rx_frame_errors", rx_frame_errors), |
77 | E1000_STAT("rx_no_buffer_count", stats.rnbc), | 77 | E1000_STAT("rx_no_buffer_count", stats.rnbc), |
78 | E1000_STAT("rx_missed_errors", stats.mpc), | 78 | E1000_STAT("rx_missed_errors", stats.mpc), |
79 | E1000_STAT("tx_aborted_errors", stats.ecol), | 79 | E1000_STAT("tx_aborted_errors", stats.ecol), |
80 | E1000_STAT("tx_carrier_errors", stats.tncrs), | 80 | E1000_STAT("tx_carrier_errors", stats.tncrs), |
81 | E1000_NETDEV_STAT("tx_fifo_errors", stats.tx_fifo_errors), | 81 | E1000_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors), |
82 | E1000_NETDEV_STAT("tx_heartbeat_errors", stats.tx_heartbeat_errors), | 82 | E1000_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors), |
83 | E1000_STAT("tx_window_errors", stats.latecol), | 83 | E1000_STAT("tx_window_errors", stats.latecol), |
84 | E1000_STAT("tx_abort_late_coll", stats.latecol), | 84 | E1000_STAT("tx_abort_late_coll", stats.latecol), |
85 | E1000_STAT("tx_deferred_ok", stats.dc), | 85 | E1000_STAT("tx_deferred_ok", stats.dc), |
@@ -684,20 +684,13 @@ static int e1000_set_ringparam(struct net_device *netdev, | |||
684 | rx_old = adapter->rx_ring; | 684 | rx_old = adapter->rx_ring; |
685 | 685 | ||
686 | err = -ENOMEM; | 686 | err = -ENOMEM; |
687 | tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); | 687 | tx_ring = kmemdup(tx_old, sizeof(struct e1000_ring), GFP_KERNEL); |
688 | if (!tx_ring) | 688 | if (!tx_ring) |
689 | goto err_alloc_tx; | 689 | goto err_alloc_tx; |
690 | /* | ||
691 | * use a memcpy to save any previously configured | ||
692 | * items like napi structs from having to be | ||
693 | * reinitialized | ||
694 | */ | ||
695 | memcpy(tx_ring, tx_old, sizeof(struct e1000_ring)); | ||
696 | 690 | ||
697 | rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL); | 691 | rx_ring = kmemdup(rx_old, sizeof(struct e1000_ring), GFP_KERNEL); |
698 | if (!rx_ring) | 692 | if (!rx_ring) |
699 | goto err_alloc_rx; | 693 | goto err_alloc_rx; |
700 | memcpy(rx_ring, rx_old, sizeof(struct e1000_ring)); | ||
701 | 694 | ||
702 | adapter->tx_ring = tx_ring; | 695 | adapter->tx_ring = tx_ring; |
703 | adapter->rx_ring = rx_ring; | 696 | adapter->rx_ring = rx_ring; |
@@ -1255,7 +1248,6 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) | |||
1255 | { | 1248 | { |
1256 | struct e1000_hw *hw = &adapter->hw; | 1249 | struct e1000_hw *hw = &adapter->hw; |
1257 | u32 ctrl_reg = 0; | 1250 | u32 ctrl_reg = 0; |
1258 | u32 stat_reg = 0; | ||
1259 | u16 phy_reg = 0; | 1251 | u16 phy_reg = 0; |
1260 | s32 ret_val = 0; | 1252 | s32 ret_val = 0; |
1261 | 1253 | ||
@@ -1363,8 +1355,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) | |||
1363 | * Set the ILOS bit on the fiber Nic if half duplex link is | 1355 | * Set the ILOS bit on the fiber Nic if half duplex link is |
1364 | * detected. | 1356 | * detected. |
1365 | */ | 1357 | */ |
1366 | stat_reg = er32(STATUS); | 1358 | if ((er32(STATUS) & E1000_STATUS_FD) == 0) |
1367 | if ((stat_reg & E1000_STATUS_FD) == 0) | ||
1368 | ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU); | 1359 | ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU); |
1369 | } | 1360 | } |
1370 | 1361 | ||
@@ -1982,14 +1973,15 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, | |||
1982 | u64 *data) | 1973 | u64 *data) |
1983 | { | 1974 | { |
1984 | struct e1000_adapter *adapter = netdev_priv(netdev); | 1975 | struct e1000_adapter *adapter = netdev_priv(netdev); |
1976 | struct rtnl_link_stats64 net_stats; | ||
1985 | int i; | 1977 | int i; |
1986 | char *p = NULL; | 1978 | char *p = NULL; |
1987 | 1979 | ||
1988 | e1000e_update_stats(adapter); | 1980 | e1000e_get_stats64(netdev, &net_stats); |
1989 | for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { | 1981 | for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { |
1990 | switch (e1000_gstrings_stats[i].type) { | 1982 | switch (e1000_gstrings_stats[i].type) { |
1991 | case NETDEV_STATS: | 1983 | case NETDEV_STATS: |
1992 | p = (char *) netdev + | 1984 | p = (char *) &net_stats + |
1993 | e1000_gstrings_stats[i].stat_offset; | 1985 | e1000_gstrings_stats[i].stat_offset; |
1994 | break; | 1986 | break; |
1995 | case E1000_STATS: | 1987 | case E1000_STATS: |
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index fb46974cfec1..232b42b7f7ce 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c | |||
@@ -2104,7 +2104,6 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) | |||
2104 | { | 2104 | { |
2105 | union ich8_hws_flash_status hsfsts; | 2105 | union ich8_hws_flash_status hsfsts; |
2106 | s32 ret_val = -E1000_ERR_NVM; | 2106 | s32 ret_val = -E1000_ERR_NVM; |
2107 | s32 i = 0; | ||
2108 | 2107 | ||
2109 | hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); | 2108 | hsfsts.regval = er16flash(ICH_FLASH_HSFSTS); |
2110 | 2109 | ||
@@ -2140,6 +2139,8 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) | |||
2140 | ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); | 2139 | ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); |
2141 | ret_val = 0; | 2140 | ret_val = 0; |
2142 | } else { | 2141 | } else { |
2142 | s32 i = 0; | ||
2143 | |||
2143 | /* | 2144 | /* |
2144 | * Otherwise poll for sometime so the current | 2145 | * Otherwise poll for sometime so the current |
2145 | * cycle has a chance to end before giving up. | 2146 | * cycle has a chance to end before giving up. |
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c index 68aa1749bf66..96921de5df2e 100644 --- a/drivers/net/e1000e/lib.c +++ b/drivers/net/e1000e/lib.c | |||
@@ -1978,15 +1978,15 @@ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) | |||
1978 | { | 1978 | { |
1979 | struct e1000_nvm_info *nvm = &hw->nvm; | 1979 | struct e1000_nvm_info *nvm = &hw->nvm; |
1980 | u32 eecd = er32(EECD); | 1980 | u32 eecd = er32(EECD); |
1981 | u16 timeout = 0; | ||
1982 | u8 spi_stat_reg; | 1981 | u8 spi_stat_reg; |
1983 | 1982 | ||
1984 | if (nvm->type == e1000_nvm_eeprom_spi) { | 1983 | if (nvm->type == e1000_nvm_eeprom_spi) { |
1984 | u16 timeout = NVM_MAX_RETRY_SPI; | ||
1985 | |||
1985 | /* Clear SK and CS */ | 1986 | /* Clear SK and CS */ |
1986 | eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); | 1987 | eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); |
1987 | ew32(EECD, eecd); | 1988 | ew32(EECD, eecd); |
1988 | udelay(1); | 1989 | udelay(1); |
1989 | timeout = NVM_MAX_RETRY_SPI; | ||
1990 | 1990 | ||
1991 | /* | 1991 | /* |
1992 | * Read "Status Register" repeatedly until the LSB is cleared. | 1992 | * Read "Status Register" repeatedly until the LSB is cleared. |
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 1c18f26b0812..5b916b01805f 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c | |||
@@ -900,8 +900,6 @@ next_desc: | |||
900 | 900 | ||
901 | adapter->total_rx_bytes += total_rx_bytes; | 901 | adapter->total_rx_bytes += total_rx_bytes; |
902 | adapter->total_rx_packets += total_rx_packets; | 902 | adapter->total_rx_packets += total_rx_packets; |
903 | netdev->stats.rx_bytes += total_rx_bytes; | ||
904 | netdev->stats.rx_packets += total_rx_packets; | ||
905 | return cleaned; | 903 | return cleaned; |
906 | } | 904 | } |
907 | 905 | ||
@@ -1057,8 +1055,6 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter) | |||
1057 | } | 1055 | } |
1058 | adapter->total_tx_bytes += total_tx_bytes; | 1056 | adapter->total_tx_bytes += total_tx_bytes; |
1059 | adapter->total_tx_packets += total_tx_packets; | 1057 | adapter->total_tx_packets += total_tx_packets; |
1060 | netdev->stats.tx_bytes += total_tx_bytes; | ||
1061 | netdev->stats.tx_packets += total_tx_packets; | ||
1062 | return count < tx_ring->count; | 1058 | return count < tx_ring->count; |
1063 | } | 1059 | } |
1064 | 1060 | ||
@@ -1245,8 +1241,6 @@ next_desc: | |||
1245 | 1241 | ||
1246 | adapter->total_rx_bytes += total_rx_bytes; | 1242 | adapter->total_rx_bytes += total_rx_bytes; |
1247 | adapter->total_rx_packets += total_rx_packets; | 1243 | adapter->total_rx_packets += total_rx_packets; |
1248 | netdev->stats.rx_bytes += total_rx_bytes; | ||
1249 | netdev->stats.rx_packets += total_rx_packets; | ||
1250 | return cleaned; | 1244 | return cleaned; |
1251 | } | 1245 | } |
1252 | 1246 | ||
@@ -1426,8 +1420,6 @@ next_desc: | |||
1426 | 1420 | ||
1427 | adapter->total_rx_bytes += total_rx_bytes; | 1421 | adapter->total_rx_bytes += total_rx_bytes; |
1428 | adapter->total_rx_packets += total_rx_packets; | 1422 | adapter->total_rx_packets += total_rx_packets; |
1429 | netdev->stats.rx_bytes += total_rx_bytes; | ||
1430 | netdev->stats.rx_packets += total_rx_packets; | ||
1431 | return cleaned; | 1423 | return cleaned; |
1432 | } | 1424 | } |
1433 | 1425 | ||
@@ -2728,7 +2720,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) | |||
2728 | { | 2720 | { |
2729 | struct e1000_hw *hw = &adapter->hw; | 2721 | struct e1000_hw *hw = &adapter->hw; |
2730 | u32 rctl, rfctl; | 2722 | u32 rctl, rfctl; |
2731 | u32 psrctl = 0; | ||
2732 | u32 pages = 0; | 2723 | u32 pages = 0; |
2733 | 2724 | ||
2734 | /* Workaround Si errata on 82579 - configure jumbo frame flow */ | 2725 | /* Workaround Si errata on 82579 - configure jumbo frame flow */ |
@@ -2827,6 +2818,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) | |||
2827 | adapter->rx_ps_pages = 0; | 2818 | adapter->rx_ps_pages = 0; |
2828 | 2819 | ||
2829 | if (adapter->rx_ps_pages) { | 2820 | if (adapter->rx_ps_pages) { |
2821 | u32 psrctl = 0; | ||
2822 | |||
2830 | /* Configure extra packet-split registers */ | 2823 | /* Configure extra packet-split registers */ |
2831 | rfctl = er32(RFCTL); | 2824 | rfctl = er32(RFCTL); |
2832 | rfctl |= E1000_RFCTL_EXTEN; | 2825 | rfctl |= E1000_RFCTL_EXTEN; |
@@ -3028,7 +3021,6 @@ static void e1000_set_multi(struct net_device *netdev) | |||
3028 | struct netdev_hw_addr *ha; | 3021 | struct netdev_hw_addr *ha; |
3029 | u8 *mta_list; | 3022 | u8 *mta_list; |
3030 | u32 rctl; | 3023 | u32 rctl; |
3031 | int i; | ||
3032 | 3024 | ||
3033 | /* Check for Promiscuous and All Multicast modes */ | 3025 | /* Check for Promiscuous and All Multicast modes */ |
3034 | 3026 | ||
@@ -3051,12 +3043,13 @@ static void e1000_set_multi(struct net_device *netdev) | |||
3051 | ew32(RCTL, rctl); | 3043 | ew32(RCTL, rctl); |
3052 | 3044 | ||
3053 | if (!netdev_mc_empty(netdev)) { | 3045 | if (!netdev_mc_empty(netdev)) { |
3046 | int i = 0; | ||
3047 | |||
3054 | mta_list = kmalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); | 3048 | mta_list = kmalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); |
3055 | if (!mta_list) | 3049 | if (!mta_list) |
3056 | return; | 3050 | return; |
3057 | 3051 | ||
3058 | /* prepare a packed array of only addresses. */ | 3052 | /* prepare a packed array of only addresses. */ |
3059 | i = 0; | ||
3060 | netdev_for_each_mc_addr(ha, netdev) | 3053 | netdev_for_each_mc_addr(ha, netdev) |
3061 | memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); | 3054 | memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); |
3062 | 3055 | ||
@@ -3338,6 +3331,8 @@ int e1000e_up(struct e1000_adapter *adapter) | |||
3338 | return 0; | 3331 | return 0; |
3339 | } | 3332 | } |
3340 | 3333 | ||
3334 | static void e1000e_update_stats(struct e1000_adapter *adapter); | ||
3335 | |||
3341 | void e1000e_down(struct e1000_adapter *adapter) | 3336 | void e1000e_down(struct e1000_adapter *adapter) |
3342 | { | 3337 | { |
3343 | struct net_device *netdev = adapter->netdev; | 3338 | struct net_device *netdev = adapter->netdev; |
@@ -3372,6 +3367,11 @@ void e1000e_down(struct e1000_adapter *adapter) | |||
3372 | del_timer_sync(&adapter->phy_info_timer); | 3367 | del_timer_sync(&adapter->phy_info_timer); |
3373 | 3368 | ||
3374 | netif_carrier_off(netdev); | 3369 | netif_carrier_off(netdev); |
3370 | |||
3371 | spin_lock(&adapter->stats64_lock); | ||
3372 | e1000e_update_stats(adapter); | ||
3373 | spin_unlock(&adapter->stats64_lock); | ||
3374 | |||
3375 | adapter->link_speed = 0; | 3375 | adapter->link_speed = 0; |
3376 | adapter->link_duplex = 0; | 3376 | adapter->link_duplex = 0; |
3377 | 3377 | ||
@@ -3413,6 +3413,8 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter) | |||
3413 | adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; | 3413 | adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; |
3414 | adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; | 3414 | adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; |
3415 | 3415 | ||
3416 | spin_lock_init(&adapter->stats64_lock); | ||
3417 | |||
3416 | e1000e_set_interrupt_capability(adapter); | 3418 | e1000e_set_interrupt_capability(adapter); |
3417 | 3419 | ||
3418 | if (e1000_alloc_queues(adapter)) | 3420 | if (e1000_alloc_queues(adapter)) |
@@ -3886,7 +3888,7 @@ release: | |||
3886 | * e1000e_update_stats - Update the board statistics counters | 3888 | * e1000e_update_stats - Update the board statistics counters |
3887 | * @adapter: board private structure | 3889 | * @adapter: board private structure |
3888 | **/ | 3890 | **/ |
3889 | void e1000e_update_stats(struct e1000_adapter *adapter) | 3891 | static void e1000e_update_stats(struct e1000_adapter *adapter) |
3890 | { | 3892 | { |
3891 | struct net_device *netdev = adapter->netdev; | 3893 | struct net_device *netdev = adapter->netdev; |
3892 | struct e1000_hw *hw = &adapter->hw; | 3894 | struct e1000_hw *hw = &adapter->hw; |
@@ -3998,10 +4000,11 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter) | |||
3998 | { | 4000 | { |
3999 | struct e1000_hw *hw = &adapter->hw; | 4001 | struct e1000_hw *hw = &adapter->hw; |
4000 | struct e1000_phy_regs *phy = &adapter->phy_regs; | 4002 | struct e1000_phy_regs *phy = &adapter->phy_regs; |
4001 | int ret_val; | ||
4002 | 4003 | ||
4003 | if ((er32(STATUS) & E1000_STATUS_LU) && | 4004 | if ((er32(STATUS) & E1000_STATUS_LU) && |
4004 | (adapter->hw.phy.media_type == e1000_media_type_copper)) { | 4005 | (adapter->hw.phy.media_type == e1000_media_type_copper)) { |
4006 | int ret_val; | ||
4007 | |||
4005 | ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr); | 4008 | ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr); |
4006 | ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr); | 4009 | ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr); |
4007 | ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise); | 4010 | ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise); |
@@ -4147,7 +4150,6 @@ static void e1000_watchdog_task(struct work_struct *work) | |||
4147 | struct e1000_ring *tx_ring = adapter->tx_ring; | 4150 | struct e1000_ring *tx_ring = adapter->tx_ring; |
4148 | struct e1000_hw *hw = &adapter->hw; | 4151 | struct e1000_hw *hw = &adapter->hw; |
4149 | u32 link, tctl; | 4152 | u32 link, tctl; |
4150 | int tx_pending = 0; | ||
4151 | 4153 | ||
4152 | link = e1000e_has_link(adapter); | 4154 | link = e1000e_has_link(adapter); |
4153 | if ((netif_carrier_ok(netdev)) && link) { | 4155 | if ((netif_carrier_ok(netdev)) && link) { |
@@ -4285,7 +4287,9 @@ static void e1000_watchdog_task(struct work_struct *work) | |||
4285 | } | 4287 | } |
4286 | 4288 | ||
4287 | link_up: | 4289 | link_up: |
4290 | spin_lock(&adapter->stats64_lock); | ||
4288 | e1000e_update_stats(adapter); | 4291 | e1000e_update_stats(adapter); |
4292 | spin_unlock(&adapter->stats64_lock); | ||
4289 | 4293 | ||
4290 | mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; | 4294 | mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; |
4291 | adapter->tpt_old = adapter->stats.tpt; | 4295 | adapter->tpt_old = adapter->stats.tpt; |
@@ -4299,21 +4303,18 @@ link_up: | |||
4299 | 4303 | ||
4300 | e1000e_update_adaptive(&adapter->hw); | 4304 | e1000e_update_adaptive(&adapter->hw); |
4301 | 4305 | ||
4302 | if (!netif_carrier_ok(netdev)) { | 4306 | if (!netif_carrier_ok(netdev) && |
4303 | tx_pending = (e1000_desc_unused(tx_ring) + 1 < | 4307 | (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) { |
4304 | tx_ring->count); | 4308 | /* |
4305 | if (tx_pending) { | 4309 | * We've lost link, so the controller stops DMA, |
4306 | /* | 4310 | * but we've got queued Tx work that's never going |
4307 | * We've lost link, so the controller stops DMA, | 4311 | * to get done, so reset controller to flush Tx. |
4308 | * but we've got queued Tx work that's never going | 4312 | * (Do the reset outside of interrupt context). |
4309 | * to get done, so reset controller to flush Tx. | 4313 | */ |
4310 | * (Do the reset outside of interrupt context). | 4314 | adapter->tx_timeout_count++; |
4311 | */ | 4315 | schedule_work(&adapter->reset_task); |
4312 | adapter->tx_timeout_count++; | 4316 | /* return immediately since reset is imminent */ |
4313 | schedule_work(&adapter->reset_task); | 4317 | return; |
4314 | /* return immediately since reset is imminent */ | ||
4315 | return; | ||
4316 | } | ||
4317 | } | 4318 | } |
4318 | 4319 | ||
4319 | /* Simple mode for Interrupt Throttle Rate (ITR) */ | 4320 | /* Simple mode for Interrupt Throttle Rate (ITR) */ |
@@ -4384,13 +4385,13 @@ static int e1000_tso(struct e1000_adapter *adapter, | |||
4384 | u32 cmd_length = 0; | 4385 | u32 cmd_length = 0; |
4385 | u16 ipcse = 0, tucse, mss; | 4386 | u16 ipcse = 0, tucse, mss; |
4386 | u8 ipcss, ipcso, tucss, tucso, hdr_len; | 4387 | u8 ipcss, ipcso, tucss, tucso, hdr_len; |
4387 | int err; | ||
4388 | 4388 | ||
4389 | if (!skb_is_gso(skb)) | 4389 | if (!skb_is_gso(skb)) |
4390 | return 0; | 4390 | return 0; |
4391 | 4391 | ||
4392 | if (skb_header_cloned(skb)) { | 4392 | if (skb_header_cloned(skb)) { |
4393 | err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | 4393 | int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
4394 | |||
4394 | if (err) | 4395 | if (err) |
4395 | return err; | 4396 | return err; |
4396 | } | 4397 | } |
@@ -4897,16 +4898,55 @@ static void e1000_reset_task(struct work_struct *work) | |||
4897 | } | 4898 | } |
4898 | 4899 | ||
4899 | /** | 4900 | /** |
4900 | * e1000_get_stats - Get System Network Statistics | 4901 | * e1000_get_stats64 - Get System Network Statistics |
4901 | * @netdev: network interface device structure | 4902 | * @netdev: network interface device structure |
4903 | * @stats: rtnl_link_stats64 pointer | ||
4902 | * | 4904 | * |
4903 | * Returns the address of the device statistics structure. | 4905 | * Returns the address of the device statistics structure. |
4904 | * The statistics are actually updated from the timer callback. | ||
4905 | **/ | 4906 | **/ |
4906 | static struct net_device_stats *e1000_get_stats(struct net_device *netdev) | 4907 | struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, |
4908 | struct rtnl_link_stats64 *stats) | ||
4907 | { | 4909 | { |
4908 | /* only return the current stats */ | 4910 | struct e1000_adapter *adapter = netdev_priv(netdev); |
4909 | return &netdev->stats; | 4911 | |
4912 | memset(stats, 0, sizeof(struct rtnl_link_stats64)); | ||
4913 | spin_lock(&adapter->stats64_lock); | ||
4914 | e1000e_update_stats(adapter); | ||
4915 | /* Fill out the OS statistics structure */ | ||
4916 | stats->rx_bytes = adapter->stats.gorc; | ||
4917 | stats->rx_packets = adapter->stats.gprc; | ||
4918 | stats->tx_bytes = adapter->stats.gotc; | ||
4919 | stats->tx_packets = adapter->stats.gptc; | ||
4920 | stats->multicast = adapter->stats.mprc; | ||
4921 | stats->collisions = adapter->stats.colc; | ||
4922 | |||
4923 | /* Rx Errors */ | ||
4924 | |||
4925 | /* | ||
4926 | * RLEC on some newer hardware can be incorrect so build | ||
4927 | * our own version based on RUC and ROC | ||
4928 | */ | ||
4929 | stats->rx_errors = adapter->stats.rxerrc + | ||
4930 | adapter->stats.crcerrs + adapter->stats.algnerrc + | ||
4931 | adapter->stats.ruc + adapter->stats.roc + | ||
4932 | adapter->stats.cexterr; | ||
4933 | stats->rx_length_errors = adapter->stats.ruc + | ||
4934 | adapter->stats.roc; | ||
4935 | stats->rx_crc_errors = adapter->stats.crcerrs; | ||
4936 | stats->rx_frame_errors = adapter->stats.algnerrc; | ||
4937 | stats->rx_missed_errors = adapter->stats.mpc; | ||
4938 | |||
4939 | /* Tx Errors */ | ||
4940 | stats->tx_errors = adapter->stats.ecol + | ||
4941 | adapter->stats.latecol; | ||
4942 | stats->tx_aborted_errors = adapter->stats.ecol; | ||
4943 | stats->tx_window_errors = adapter->stats.latecol; | ||
4944 | stats->tx_carrier_errors = adapter->stats.tncrs; | ||
4945 | |||
4946 | /* Tx Dropped needs to be maintained elsewhere */ | ||
4947 | |||
4948 | spin_unlock(&adapter->stats64_lock); | ||
4949 | return stats; | ||
4910 | } | 4950 | } |
4911 | 4951 | ||
4912 | /** | 4952 | /** |
@@ -5476,9 +5516,10 @@ static irqreturn_t e1000_intr_msix(int irq, void *data) | |||
5476 | { | 5516 | { |
5477 | struct net_device *netdev = data; | 5517 | struct net_device *netdev = data; |
5478 | struct e1000_adapter *adapter = netdev_priv(netdev); | 5518 | struct e1000_adapter *adapter = netdev_priv(netdev); |
5479 | int vector, msix_irq; | ||
5480 | 5519 | ||
5481 | if (adapter->msix_entries) { | 5520 | if (adapter->msix_entries) { |
5521 | int vector, msix_irq; | ||
5522 | |||
5482 | vector = 0; | 5523 | vector = 0; |
5483 | msix_irq = adapter->msix_entries[vector].vector; | 5524 | msix_irq = adapter->msix_entries[vector].vector; |
5484 | disable_irq(msix_irq); | 5525 | disable_irq(msix_irq); |
@@ -5675,7 +5716,7 @@ static const struct net_device_ops e1000e_netdev_ops = { | |||
5675 | .ndo_open = e1000_open, | 5716 | .ndo_open = e1000_open, |
5676 | .ndo_stop = e1000_close, | 5717 | .ndo_stop = e1000_close, |
5677 | .ndo_start_xmit = e1000_xmit_frame, | 5718 | .ndo_start_xmit = e1000_xmit_frame, |
5678 | .ndo_get_stats = e1000_get_stats, | 5719 | .ndo_get_stats64 = e1000e_get_stats64, |
5679 | .ndo_set_multicast_list = e1000_set_multi, | 5720 | .ndo_set_multicast_list = e1000_set_multi, |
5680 | .ndo_set_mac_address = e1000_set_mac, | 5721 | .ndo_set_mac_address = e1000_set_mac, |
5681 | .ndo_change_mtu = e1000_change_mtu, | 5722 | .ndo_change_mtu = e1000_change_mtu, |
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c index 6bea051b134b..6ae31fcfb629 100644 --- a/drivers/net/e1000e/phy.c +++ b/drivers/net/e1000e/phy.c | |||
@@ -2409,9 +2409,7 @@ static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg) | |||
2409 | s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) | 2409 | s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) |
2410 | { | 2410 | { |
2411 | s32 ret_val; | 2411 | s32 ret_val; |
2412 | u32 page_select = 0; | ||
2413 | u32 page = offset >> IGP_PAGE_SHIFT; | 2412 | u32 page = offset >> IGP_PAGE_SHIFT; |
2414 | u32 page_shift = 0; | ||
2415 | 2413 | ||
2416 | ret_val = hw->phy.ops.acquire(hw); | 2414 | ret_val = hw->phy.ops.acquire(hw); |
2417 | if (ret_val) | 2415 | if (ret_val) |
@@ -2427,6 +2425,8 @@ s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) | |||
2427 | hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); | 2425 | hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); |
2428 | 2426 | ||
2429 | if (offset > MAX_PHY_MULTI_PAGE_REG) { | 2427 | if (offset > MAX_PHY_MULTI_PAGE_REG) { |
2428 | u32 page_shift, page_select; | ||
2429 | |||
2430 | /* | 2430 | /* |
2431 | * Page select is register 31 for phy address 1 and 22 for | 2431 | * Page select is register 31 for phy address 1 and 22 for |
2432 | * phy address 2 and 3. Page select is shifted only for | 2432 | * phy address 2 and 3. Page select is shifted only for |
@@ -2468,9 +2468,7 @@ out: | |||
2468 | s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) | 2468 | s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) |
2469 | { | 2469 | { |
2470 | s32 ret_val; | 2470 | s32 ret_val; |
2471 | u32 page_select = 0; | ||
2472 | u32 page = offset >> IGP_PAGE_SHIFT; | 2471 | u32 page = offset >> IGP_PAGE_SHIFT; |
2473 | u32 page_shift = 0; | ||
2474 | 2472 | ||
2475 | ret_val = hw->phy.ops.acquire(hw); | 2473 | ret_val = hw->phy.ops.acquire(hw); |
2476 | if (ret_val) | 2474 | if (ret_val) |
@@ -2486,6 +2484,8 @@ s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) | |||
2486 | hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); | 2484 | hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); |
2487 | 2485 | ||
2488 | if (offset > MAX_PHY_MULTI_PAGE_REG) { | 2486 | if (offset > MAX_PHY_MULTI_PAGE_REG) { |
2487 | u32 page_shift, page_select; | ||
2488 | |||
2489 | /* | 2489 | /* |
2490 | * Page select is register 31 for phy address 1 and 22 for | 2490 | * Page select is register 31 for phy address 1 and 22 for |
2491 | * phy address 2 and 3. Page select is shifted only for | 2491 | * phy address 2 and 3. Page select is shifted only for |
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h index a937f49d9db7..ca3be4f15556 100644 --- a/drivers/net/enic/enic.h +++ b/drivers/net/enic/enic.h | |||
@@ -32,8 +32,8 @@ | |||
32 | 32 | ||
33 | #define DRV_NAME "enic" | 33 | #define DRV_NAME "enic" |
34 | #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" | 34 | #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" |
35 | #define DRV_VERSION "1.4.1.10" | 35 | #define DRV_VERSION "2.1.1.2" |
36 | #define DRV_COPYRIGHT "Copyright 2008-2010 Cisco Systems, Inc" | 36 | #define DRV_COPYRIGHT "Copyright 2008-2011 Cisco Systems, Inc" |
37 | 37 | ||
38 | #define ENIC_BARS_MAX 6 | 38 | #define ENIC_BARS_MAX 6 |
39 | 39 | ||
@@ -49,7 +49,7 @@ struct enic_msix_entry { | |||
49 | void *devid; | 49 | void *devid; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | #define ENIC_SET_APPLIED (1 << 0) | 52 | #define ENIC_PORT_REQUEST_APPLIED (1 << 0) |
53 | #define ENIC_SET_REQUEST (1 << 1) | 53 | #define ENIC_SET_REQUEST (1 << 1) |
54 | #define ENIC_SET_NAME (1 << 2) | 54 | #define ENIC_SET_NAME (1 << 2) |
55 | #define ENIC_SET_INSTANCE (1 << 3) | 55 | #define ENIC_SET_INSTANCE (1 << 3) |
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index a0af48c51fb3..89664c670972 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c | |||
@@ -1318,18 +1318,20 @@ static int enic_set_port_profile(struct enic *enic, u8 *mac) | |||
1318 | vic_provinfo_free(vp); | 1318 | vic_provinfo_free(vp); |
1319 | if (err) | 1319 | if (err) |
1320 | return err; | 1320 | return err; |
1321 | |||
1322 | enic->pp.set |= ENIC_SET_APPLIED; | ||
1323 | break; | 1321 | break; |
1324 | 1322 | ||
1325 | case PORT_REQUEST_DISASSOCIATE: | 1323 | case PORT_REQUEST_DISASSOCIATE: |
1326 | enic->pp.set &= ~ENIC_SET_APPLIED; | ||
1327 | break; | 1324 | break; |
1328 | 1325 | ||
1329 | default: | 1326 | default: |
1330 | return -EINVAL; | 1327 | return -EINVAL; |
1331 | } | 1328 | } |
1332 | 1329 | ||
1330 | /* Set flag to indicate that the port assoc/disassoc | ||
1331 | * request has been sent out to fw | ||
1332 | */ | ||
1333 | enic->pp.set |= ENIC_PORT_REQUEST_APPLIED; | ||
1334 | |||
1333 | return 0; | 1335 | return 0; |
1334 | } | 1336 | } |
1335 | 1337 | ||
@@ -1411,7 +1413,7 @@ static int enic_get_vf_port(struct net_device *netdev, int vf, | |||
1411 | int err, error, done; | 1413 | int err, error, done; |
1412 | u16 response = PORT_PROFILE_RESPONSE_SUCCESS; | 1414 | u16 response = PORT_PROFILE_RESPONSE_SUCCESS; |
1413 | 1415 | ||
1414 | if (!(enic->pp.set & ENIC_SET_APPLIED)) | 1416 | if (!(enic->pp.set & ENIC_PORT_REQUEST_APPLIED)) |
1415 | return -ENODATA; | 1417 | return -ENODATA; |
1416 | 1418 | ||
1417 | err = enic_dev_init_done(enic, &done, &error); | 1419 | err = enic_dev_init_done(enic, &done, &error); |
diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c index 0a2368fa6bc6..c1552b6f4a68 100644 --- a/drivers/net/igb/e1000_82575.c +++ b/drivers/net/igb/e1000_82575.c | |||
@@ -129,6 +129,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) | |||
129 | break; | 129 | break; |
130 | case E1000_DEV_ID_82580_COPPER: | 130 | case E1000_DEV_ID_82580_COPPER: |
131 | case E1000_DEV_ID_82580_FIBER: | 131 | case E1000_DEV_ID_82580_FIBER: |
132 | case E1000_DEV_ID_82580_QUAD_FIBER: | ||
132 | case E1000_DEV_ID_82580_SERDES: | 133 | case E1000_DEV_ID_82580_SERDES: |
133 | case E1000_DEV_ID_82580_SGMII: | 134 | case E1000_DEV_ID_82580_SGMII: |
134 | case E1000_DEV_ID_82580_COPPER_DUAL: | 135 | case E1000_DEV_ID_82580_COPPER_DUAL: |
diff --git a/drivers/net/igb/e1000_hw.h b/drivers/net/igb/e1000_hw.h index e2638afb8cdc..281324e85980 100644 --- a/drivers/net/igb/e1000_hw.h +++ b/drivers/net/igb/e1000_hw.h | |||
@@ -54,6 +54,7 @@ struct e1000_hw; | |||
54 | #define E1000_DEV_ID_82580_SERDES 0x1510 | 54 | #define E1000_DEV_ID_82580_SERDES 0x1510 |
55 | #define E1000_DEV_ID_82580_SGMII 0x1511 | 55 | #define E1000_DEV_ID_82580_SGMII 0x1511 |
56 | #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 | 56 | #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 |
57 | #define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 | ||
57 | #define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 | 58 | #define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 |
58 | #define E1000_DEV_ID_DH89XXCC_SERDES 0x043A | 59 | #define E1000_DEV_ID_DH89XXCC_SERDES 0x043A |
59 | #define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C | 60 | #define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C |
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 58c665b7513d..200cc3209672 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c | |||
@@ -68,6 +68,7 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { | |||
68 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, | 68 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, |
69 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, | 69 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, |
70 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, | 70 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, |
71 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, | ||
71 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, | 72 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, |
72 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, | 73 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, |
73 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, | 74 | { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, |
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index c7a6c4466978..9f6d670748d1 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c | |||
@@ -592,8 +592,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
592 | ppp_release(NULL, file); | 592 | ppp_release(NULL, file); |
593 | err = 0; | 593 | err = 0; |
594 | } else | 594 | } else |
595 | printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n", | 595 | pr_warn("PPPIOCDETACH file->f_count=%ld\n", |
596 | atomic_long_read(&file->f_count)); | 596 | atomic_long_read(&file->f_count)); |
597 | mutex_unlock(&ppp_mutex); | 597 | mutex_unlock(&ppp_mutex); |
598 | return err; | 598 | return err; |
599 | } | 599 | } |
@@ -630,7 +630,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
630 | 630 | ||
631 | if (pf->kind != INTERFACE) { | 631 | if (pf->kind != INTERFACE) { |
632 | /* can't happen */ | 632 | /* can't happen */ |
633 | printk(KERN_ERR "PPP: not interface or channel??\n"); | 633 | pr_err("PPP: not interface or channel??\n"); |
634 | return -EINVAL; | 634 | return -EINVAL; |
635 | } | 635 | } |
636 | 636 | ||
@@ -704,7 +704,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
704 | } | 704 | } |
705 | vj = slhc_init(val2+1, val+1); | 705 | vj = slhc_init(val2+1, val+1); |
706 | if (!vj) { | 706 | if (!vj) { |
707 | printk(KERN_ERR "PPP: no memory (VJ compressor)\n"); | 707 | netdev_err(ppp->dev, |
708 | "PPP: no memory (VJ compressor)\n"); | ||
708 | err = -ENOMEM; | 709 | err = -ENOMEM; |
709 | break; | 710 | break; |
710 | } | 711 | } |
@@ -898,17 +899,17 @@ static int __init ppp_init(void) | |||
898 | { | 899 | { |
899 | int err; | 900 | int err; |
900 | 901 | ||
901 | printk(KERN_INFO "PPP generic driver version " PPP_VERSION "\n"); | 902 | pr_info("PPP generic driver version " PPP_VERSION "\n"); |
902 | 903 | ||
903 | err = register_pernet_device(&ppp_net_ops); | 904 | err = register_pernet_device(&ppp_net_ops); |
904 | if (err) { | 905 | if (err) { |
905 | printk(KERN_ERR "failed to register PPP pernet device (%d)\n", err); | 906 | pr_err("failed to register PPP pernet device (%d)\n", err); |
906 | goto out; | 907 | goto out; |
907 | } | 908 | } |
908 | 909 | ||
909 | err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops); | 910 | err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops); |
910 | if (err) { | 911 | if (err) { |
911 | printk(KERN_ERR "failed to register PPP device (%d)\n", err); | 912 | pr_err("failed to register PPP device (%d)\n", err); |
912 | goto out_net; | 913 | goto out_net; |
913 | } | 914 | } |
914 | 915 | ||
@@ -1078,7 +1079,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) | |||
1078 | new_skb = alloc_skb(new_skb_size, GFP_ATOMIC); | 1079 | new_skb = alloc_skb(new_skb_size, GFP_ATOMIC); |
1079 | if (!new_skb) { | 1080 | if (!new_skb) { |
1080 | if (net_ratelimit()) | 1081 | if (net_ratelimit()) |
1081 | printk(KERN_ERR "PPP: no memory (comp pkt)\n"); | 1082 | netdev_err(ppp->dev, "PPP: no memory (comp pkt)\n"); |
1082 | return NULL; | 1083 | return NULL; |
1083 | } | 1084 | } |
1084 | if (ppp->dev->hard_header_len > PPP_HDRLEN) | 1085 | if (ppp->dev->hard_header_len > PPP_HDRLEN) |
@@ -1108,7 +1109,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) | |||
1108 | * the same number. | 1109 | * the same number. |
1109 | */ | 1110 | */ |
1110 | if (net_ratelimit()) | 1111 | if (net_ratelimit()) |
1111 | printk(KERN_ERR "ppp: compressor dropped pkt\n"); | 1112 | netdev_err(ppp->dev, "ppp: compressor dropped pkt\n"); |
1112 | kfree_skb(skb); | 1113 | kfree_skb(skb); |
1113 | kfree_skb(new_skb); | 1114 | kfree_skb(new_skb); |
1114 | new_skb = NULL; | 1115 | new_skb = NULL; |
@@ -1138,7 +1139,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1138 | if (ppp->pass_filter && | 1139 | if (ppp->pass_filter && |
1139 | sk_run_filter(skb, ppp->pass_filter) == 0) { | 1140 | sk_run_filter(skb, ppp->pass_filter) == 0) { |
1140 | if (ppp->debug & 1) | 1141 | if (ppp->debug & 1) |
1141 | printk(KERN_DEBUG "PPP: outbound frame not passed\n"); | 1142 | netdev_printk(KERN_DEBUG, ppp->dev, |
1143 | "PPP: outbound frame " | ||
1144 | "not passed\n"); | ||
1142 | kfree_skb(skb); | 1145 | kfree_skb(skb); |
1143 | return; | 1146 | return; |
1144 | } | 1147 | } |
@@ -1164,7 +1167,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1164 | new_skb = alloc_skb(skb->len + ppp->dev->hard_header_len - 2, | 1167 | new_skb = alloc_skb(skb->len + ppp->dev->hard_header_len - 2, |
1165 | GFP_ATOMIC); | 1168 | GFP_ATOMIC); |
1166 | if (!new_skb) { | 1169 | if (!new_skb) { |
1167 | printk(KERN_ERR "PPP: no memory (VJ comp pkt)\n"); | 1170 | netdev_err(ppp->dev, "PPP: no memory (VJ comp pkt)\n"); |
1168 | goto drop; | 1171 | goto drop; |
1169 | } | 1172 | } |
1170 | skb_reserve(new_skb, ppp->dev->hard_header_len - 2); | 1173 | skb_reserve(new_skb, ppp->dev->hard_header_len - 2); |
@@ -1202,7 +1205,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1202 | proto != PPP_LCP && proto != PPP_CCP) { | 1205 | proto != PPP_LCP && proto != PPP_CCP) { |
1203 | if (!(ppp->flags & SC_CCP_UP) && (ppp->flags & SC_MUST_COMP)) { | 1206 | if (!(ppp->flags & SC_CCP_UP) && (ppp->flags & SC_MUST_COMP)) { |
1204 | if (net_ratelimit()) | 1207 | if (net_ratelimit()) |
1205 | printk(KERN_ERR "ppp: compression required but down - pkt dropped.\n"); | 1208 | netdev_err(ppp->dev, |
1209 | "ppp: compression required but " | ||
1210 | "down - pkt dropped.\n"); | ||
1206 | goto drop; | 1211 | goto drop; |
1207 | } | 1212 | } |
1208 | skb = pad_compress_skb(ppp, skb); | 1213 | skb = pad_compress_skb(ppp, skb); |
@@ -1505,7 +1510,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1505 | noskb: | 1510 | noskb: |
1506 | spin_unlock_bh(&pch->downl); | 1511 | spin_unlock_bh(&pch->downl); |
1507 | if (ppp->debug & 1) | 1512 | if (ppp->debug & 1) |
1508 | printk(KERN_ERR "PPP: no memory (fragment)\n"); | 1513 | netdev_err(ppp->dev, "PPP: no memory (fragment)\n"); |
1509 | ++ppp->dev->stats.tx_errors; | 1514 | ++ppp->dev->stats.tx_errors; |
1510 | ++ppp->nxseq; | 1515 | ++ppp->nxseq; |
1511 | return 1; /* abandon the frame */ | 1516 | return 1; /* abandon the frame */ |
@@ -1686,7 +1691,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1686 | /* copy to a new sk_buff with more tailroom */ | 1691 | /* copy to a new sk_buff with more tailroom */ |
1687 | ns = dev_alloc_skb(skb->len + 128); | 1692 | ns = dev_alloc_skb(skb->len + 128); |
1688 | if (!ns) { | 1693 | if (!ns) { |
1689 | printk(KERN_ERR"PPP: no memory (VJ decomp)\n"); | 1694 | netdev_err(ppp->dev, "PPP: no memory " |
1695 | "(VJ decomp)\n"); | ||
1690 | goto err; | 1696 | goto err; |
1691 | } | 1697 | } |
1692 | skb_reserve(ns, 2); | 1698 | skb_reserve(ns, 2); |
@@ -1699,7 +1705,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1699 | 1705 | ||
1700 | len = slhc_uncompress(ppp->vj, skb->data + 2, skb->len - 2); | 1706 | len = slhc_uncompress(ppp->vj, skb->data + 2, skb->len - 2); |
1701 | if (len <= 0) { | 1707 | if (len <= 0) { |
1702 | printk(KERN_DEBUG "PPP: VJ decompression error\n"); | 1708 | netdev_printk(KERN_DEBUG, ppp->dev, |
1709 | "PPP: VJ decompression error\n"); | ||
1703 | goto err; | 1710 | goto err; |
1704 | } | 1711 | } |
1705 | len += 2; | 1712 | len += 2; |
@@ -1721,7 +1728,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1721 | goto err; | 1728 | goto err; |
1722 | 1729 | ||
1723 | if (slhc_remember(ppp->vj, skb->data + 2, skb->len - 2) <= 0) { | 1730 | if (slhc_remember(ppp->vj, skb->data + 2, skb->len - 2) <= 0) { |
1724 | printk(KERN_ERR "PPP: VJ uncompressed error\n"); | 1731 | netdev_err(ppp->dev, "PPP: VJ uncompressed error\n"); |
1725 | goto err; | 1732 | goto err; |
1726 | } | 1733 | } |
1727 | proto = PPP_IP; | 1734 | proto = PPP_IP; |
@@ -1762,8 +1769,9 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1762 | if (ppp->pass_filter && | 1769 | if (ppp->pass_filter && |
1763 | sk_run_filter(skb, ppp->pass_filter) == 0) { | 1770 | sk_run_filter(skb, ppp->pass_filter) == 0) { |
1764 | if (ppp->debug & 1) | 1771 | if (ppp->debug & 1) |
1765 | printk(KERN_DEBUG "PPP: inbound frame " | 1772 | netdev_printk(KERN_DEBUG, ppp->dev, |
1766 | "not passed\n"); | 1773 | "PPP: inbound frame " |
1774 | "not passed\n"); | ||
1767 | kfree_skb(skb); | 1775 | kfree_skb(skb); |
1768 | return; | 1776 | return; |
1769 | } | 1777 | } |
@@ -1821,7 +1829,8 @@ ppp_decompress_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1821 | 1829 | ||
1822 | ns = dev_alloc_skb(obuff_size); | 1830 | ns = dev_alloc_skb(obuff_size); |
1823 | if (!ns) { | 1831 | if (!ns) { |
1824 | printk(KERN_ERR "ppp_decompress_frame: no memory\n"); | 1832 | netdev_err(ppp->dev, "ppp_decompress_frame: " |
1833 | "no memory\n"); | ||
1825 | goto err; | 1834 | goto err; |
1826 | } | 1835 | } |
1827 | /* the decompressor still expects the A/C bytes in the hdr */ | 1836 | /* the decompressor still expects the A/C bytes in the hdr */ |
@@ -1989,7 +1998,7 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
1989 | u32 seq = ppp->nextseq; | 1998 | u32 seq = ppp->nextseq; |
1990 | u32 minseq = ppp->minseq; | 1999 | u32 minseq = ppp->minseq; |
1991 | struct sk_buff_head *list = &ppp->mrq; | 2000 | struct sk_buff_head *list = &ppp->mrq; |
1992 | struct sk_buff *p, *next; | 2001 | struct sk_buff *p, *tmp; |
1993 | struct sk_buff *head, *tail; | 2002 | struct sk_buff *head, *tail; |
1994 | struct sk_buff *skb = NULL; | 2003 | struct sk_buff *skb = NULL; |
1995 | int lost = 0, len = 0; | 2004 | int lost = 0, len = 0; |
@@ -1998,13 +2007,15 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
1998 | return NULL; | 2007 | return NULL; |
1999 | head = list->next; | 2008 | head = list->next; |
2000 | tail = NULL; | 2009 | tail = NULL; |
2001 | for (p = head; p != (struct sk_buff *) list; p = next) { | 2010 | skb_queue_walk_safe(list, p, tmp) { |
2002 | next = p->next; | 2011 | again: |
2003 | if (seq_before(PPP_MP_CB(p)->sequence, seq)) { | 2012 | if (seq_before(PPP_MP_CB(p)->sequence, seq)) { |
2004 | /* this can't happen, anyway ignore the skb */ | 2013 | /* this can't happen, anyway ignore the skb */ |
2005 | printk(KERN_ERR "ppp_mp_reconstruct bad seq %u < %u\n", | 2014 | netdev_err(ppp->dev, "ppp_mp_reconstruct bad " |
2006 | PPP_MP_CB(p)->sequence, seq); | 2015 | "seq %u < %u\n", |
2007 | head = next; | 2016 | PPP_MP_CB(p)->sequence, seq); |
2017 | __skb_unlink(p, list); | ||
2018 | kfree_skb(p); | ||
2008 | continue; | 2019 | continue; |
2009 | } | 2020 | } |
2010 | if (PPP_MP_CB(p)->sequence != seq) { | 2021 | if (PPP_MP_CB(p)->sequence != seq) { |
@@ -2016,8 +2027,7 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
2016 | lost = 1; | 2027 | lost = 1; |
2017 | seq = seq_before(minseq, PPP_MP_CB(p)->sequence)? | 2028 | seq = seq_before(minseq, PPP_MP_CB(p)->sequence)? |
2018 | minseq + 1: PPP_MP_CB(p)->sequence; | 2029 | minseq + 1: PPP_MP_CB(p)->sequence; |
2019 | next = p; | 2030 | goto again; |
2020 | continue; | ||
2021 | } | 2031 | } |
2022 | 2032 | ||
2023 | /* | 2033 | /* |
@@ -2042,17 +2052,9 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
2042 | (PPP_MP_CB(head)->BEbits & B)) { | 2052 | (PPP_MP_CB(head)->BEbits & B)) { |
2043 | if (len > ppp->mrru + 2) { | 2053 | if (len > ppp->mrru + 2) { |
2044 | ++ppp->dev->stats.rx_length_errors; | 2054 | ++ppp->dev->stats.rx_length_errors; |
2045 | printk(KERN_DEBUG "PPP: reconstructed packet" | 2055 | netdev_printk(KERN_DEBUG, ppp->dev, |
2046 | " is too long (%d)\n", len); | 2056 | "PPP: reconstructed packet" |
2047 | } else if (p == head) { | 2057 | " is too long (%d)\n", len); |
2048 | /* fragment is complete packet - reuse skb */ | ||
2049 | tail = p; | ||
2050 | skb = skb_get(p); | ||
2051 | break; | ||
2052 | } else if ((skb = dev_alloc_skb(len)) == NULL) { | ||
2053 | ++ppp->dev->stats.rx_missed_errors; | ||
2054 | printk(KERN_DEBUG "PPP: no memory for " | ||
2055 | "reconstructed packet"); | ||
2056 | } else { | 2058 | } else { |
2057 | tail = p; | 2059 | tail = p; |
2058 | break; | 2060 | break; |
@@ -2065,9 +2067,17 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
2065 | * and we haven't found a complete valid packet yet, | 2067 | * and we haven't found a complete valid packet yet, |
2066 | * we can discard up to and including this fragment. | 2068 | * we can discard up to and including this fragment. |
2067 | */ | 2069 | */ |
2068 | if (PPP_MP_CB(p)->BEbits & E) | 2070 | if (PPP_MP_CB(p)->BEbits & E) { |
2069 | head = next; | 2071 | struct sk_buff *tmp2; |
2070 | 2072 | ||
2073 | skb_queue_reverse_walk_from_safe(list, p, tmp2) { | ||
2074 | __skb_unlink(p, list); | ||
2075 | kfree_skb(p); | ||
2076 | } | ||
2077 | head = skb_peek(list); | ||
2078 | if (!head) | ||
2079 | break; | ||
2080 | } | ||
2071 | ++seq; | 2081 | ++seq; |
2072 | } | 2082 | } |
2073 | 2083 | ||
@@ -2077,26 +2087,37 @@ ppp_mp_reconstruct(struct ppp *ppp) | |||
2077 | signal a receive error. */ | 2087 | signal a receive error. */ |
2078 | if (PPP_MP_CB(head)->sequence != ppp->nextseq) { | 2088 | if (PPP_MP_CB(head)->sequence != ppp->nextseq) { |
2079 | if (ppp->debug & 1) | 2089 | if (ppp->debug & 1) |
2080 | printk(KERN_DEBUG " missed pkts %u..%u\n", | 2090 | netdev_printk(KERN_DEBUG, ppp->dev, |
2081 | ppp->nextseq, | 2091 | " missed pkts %u..%u\n", |
2082 | PPP_MP_CB(head)->sequence-1); | 2092 | ppp->nextseq, |
2093 | PPP_MP_CB(head)->sequence-1); | ||
2083 | ++ppp->dev->stats.rx_dropped; | 2094 | ++ppp->dev->stats.rx_dropped; |
2084 | ppp_receive_error(ppp); | 2095 | ppp_receive_error(ppp); |
2085 | } | 2096 | } |
2086 | 2097 | ||
2087 | if (head != tail) | 2098 | skb = head; |
2088 | /* copy to a single skb */ | 2099 | if (head != tail) { |
2089 | for (p = head; p != tail->next; p = p->next) | 2100 | struct sk_buff **fragpp = &skb_shinfo(skb)->frag_list; |
2090 | skb_copy_bits(p, 0, skb_put(skb, p->len), p->len); | 2101 | p = skb_queue_next(list, head); |
2091 | ppp->nextseq = PPP_MP_CB(tail)->sequence + 1; | 2102 | __skb_unlink(skb, list); |
2092 | head = tail->next; | 2103 | skb_queue_walk_from_safe(list, p, tmp) { |
2093 | } | 2104 | __skb_unlink(p, list); |
2105 | *fragpp = p; | ||
2106 | p->next = NULL; | ||
2107 | fragpp = &p->next; | ||
2108 | |||
2109 | skb->len += p->len; | ||
2110 | skb->data_len += p->len; | ||
2111 | skb->truesize += p->len; | ||
2112 | |||
2113 | if (p == tail) | ||
2114 | break; | ||
2115 | } | ||
2116 | } else { | ||
2117 | __skb_unlink(skb, list); | ||
2118 | } | ||
2094 | 2119 | ||
2095 | /* Discard all the skbuffs that we have copied the data out of | 2120 | ppp->nextseq = PPP_MP_CB(tail)->sequence + 1; |
2096 | or that we can't use. */ | ||
2097 | while ((p = list->next) != head) { | ||
2098 | __skb_unlink(p, list); | ||
2099 | kfree_skb(p); | ||
2100 | } | 2121 | } |
2101 | 2122 | ||
2102 | return skb; | 2123 | return skb; |
@@ -2617,8 +2638,8 @@ ppp_create_interface(struct net *net, int unit, int *retp) | |||
2617 | ret = register_netdev(dev); | 2638 | ret = register_netdev(dev); |
2618 | if (ret != 0) { | 2639 | if (ret != 0) { |
2619 | unit_put(&pn->units_idr, unit); | 2640 | unit_put(&pn->units_idr, unit); |
2620 | printk(KERN_ERR "PPP: couldn't register device %s (%d)\n", | 2641 | netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n", |
2621 | dev->name, ret); | 2642 | dev->name, ret); |
2622 | goto out2; | 2643 | goto out2; |
2623 | } | 2644 | } |
2624 | 2645 | ||
@@ -2690,9 +2711,9 @@ static void ppp_destroy_interface(struct ppp *ppp) | |||
2690 | 2711 | ||
2691 | if (!ppp->file.dead || ppp->n_channels) { | 2712 | if (!ppp->file.dead || ppp->n_channels) { |
2692 | /* "can't happen" */ | 2713 | /* "can't happen" */ |
2693 | printk(KERN_ERR "ppp: destroying ppp struct %p but dead=%d " | 2714 | netdev_err(ppp->dev, "ppp: destroying ppp struct %p " |
2694 | "n_channels=%d !\n", ppp, ppp->file.dead, | 2715 | "but dead=%d n_channels=%d !\n", |
2695 | ppp->n_channels); | 2716 | ppp, ppp->file.dead, ppp->n_channels); |
2696 | return; | 2717 | return; |
2697 | } | 2718 | } |
2698 | 2719 | ||
@@ -2834,8 +2855,7 @@ static void ppp_destroy_channel(struct channel *pch) | |||
2834 | 2855 | ||
2835 | if (!pch->file.dead) { | 2856 | if (!pch->file.dead) { |
2836 | /* "can't happen" */ | 2857 | /* "can't happen" */ |
2837 | printk(KERN_ERR "ppp: destroying undead channel %p !\n", | 2858 | pr_err("ppp: destroying undead channel %p !\n", pch); |
2838 | pch); | ||
2839 | return; | 2859 | return; |
2840 | } | 2860 | } |
2841 | skb_queue_purge(&pch->file.xq); | 2861 | skb_queue_purge(&pch->file.xq); |
@@ -2847,7 +2867,7 @@ static void __exit ppp_cleanup(void) | |||
2847 | { | 2867 | { |
2848 | /* should never happen */ | 2868 | /* should never happen */ |
2849 | if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count)) | 2869 | if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count)) |
2850 | printk(KERN_ERR "PPP: removing module but units remain!\n"); | 2870 | pr_err("PPP: removing module but units remain!\n"); |
2851 | unregister_chrdev(PPP_MAJOR, "ppp"); | 2871 | unregister_chrdev(PPP_MAJOR, "ppp"); |
2852 | device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); | 2872 | device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); |
2853 | class_destroy(ppp_class); | 2873 | class_destroy(ppp_class); |
@@ -2865,7 +2885,7 @@ static int __unit_alloc(struct idr *p, void *ptr, int n) | |||
2865 | 2885 | ||
2866 | again: | 2886 | again: |
2867 | if (!idr_pre_get(p, GFP_KERNEL)) { | 2887 | if (!idr_pre_get(p, GFP_KERNEL)) { |
2868 | printk(KERN_ERR "PPP: No free memory for idr\n"); | 2888 | pr_err("PPP: No free memory for idr\n"); |
2869 | return -ENOMEM; | 2889 | return -ENOMEM; |
2870 | } | 2890 | } |
2871 | 2891 | ||
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 09cac704fdd7..0d6fec6b7d93 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c | |||
@@ -2923,6 +2923,7 @@ static u16 wol_calc_crc(int size, u8 *pattern, u8 *mask_pattern) | |||
2923 | static int velocity_set_wol(struct velocity_info *vptr) | 2923 | static int velocity_set_wol(struct velocity_info *vptr) |
2924 | { | 2924 | { |
2925 | struct mac_regs __iomem *regs = vptr->mac_regs; | 2925 | struct mac_regs __iomem *regs = vptr->mac_regs; |
2926 | enum speed_opt spd_dpx = vptr->options.spd_dpx; | ||
2926 | static u8 buf[256]; | 2927 | static u8 buf[256]; |
2927 | int i; | 2928 | int i; |
2928 | 2929 | ||
@@ -2968,6 +2969,12 @@ static int velocity_set_wol(struct velocity_info *vptr) | |||
2968 | 2969 | ||
2969 | writew(0x0FFF, ®s->WOLSRClr); | 2970 | writew(0x0FFF, ®s->WOLSRClr); |
2970 | 2971 | ||
2972 | if (spd_dpx == SPD_DPX_1000_FULL) | ||
2973 | goto mac_done; | ||
2974 | |||
2975 | if (spd_dpx != SPD_DPX_AUTO) | ||
2976 | goto advertise_done; | ||
2977 | |||
2971 | if (vptr->mii_status & VELOCITY_AUTONEG_ENABLE) { | 2978 | if (vptr->mii_status & VELOCITY_AUTONEG_ENABLE) { |
2972 | if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201) | 2979 | if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201) |
2973 | MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs); | 2980 | MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs); |
@@ -2978,6 +2985,7 @@ static int velocity_set_wol(struct velocity_info *vptr) | |||
2978 | if (vptr->mii_status & VELOCITY_SPEED_1000) | 2985 | if (vptr->mii_status & VELOCITY_SPEED_1000) |
2979 | MII_REG_BITS_ON(BMCR_ANRESTART, MII_BMCR, vptr->mac_regs); | 2986 | MII_REG_BITS_ON(BMCR_ANRESTART, MII_BMCR, vptr->mac_regs); |
2980 | 2987 | ||
2988 | advertise_done: | ||
2981 | BYTE_REG_BITS_ON(CHIPGCR_FCMODE, ®s->CHIPGCR); | 2989 | BYTE_REG_BITS_ON(CHIPGCR_FCMODE, ®s->CHIPGCR); |
2982 | 2990 | ||
2983 | { | 2991 | { |
@@ -2987,6 +2995,7 @@ static int velocity_set_wol(struct velocity_info *vptr) | |||
2987 | writeb(GCR, ®s->CHIPGCR); | 2995 | writeb(GCR, ®s->CHIPGCR); |
2988 | } | 2996 | } |
2989 | 2997 | ||
2998 | mac_done: | ||
2990 | BYTE_REG_BITS_OFF(ISR_PWEI, ®s->ISR); | 2999 | BYTE_REG_BITS_OFF(ISR_PWEI, ®s->ISR); |
2991 | /* Turn on SWPTAG just before entering power mode */ | 3000 | /* Turn on SWPTAG just before entering power mode */ |
2992 | BYTE_REG_BITS_ON(STICKHW_SWPTAG, ®s->STICKHW); | 3001 | BYTE_REG_BITS_ON(STICKHW_SWPTAG, ®s->STICKHW); |
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h index aa2e69b9ff61..d7227539484e 100644 --- a/drivers/net/via-velocity.h +++ b/drivers/net/via-velocity.h | |||
@@ -361,7 +361,7 @@ enum velocity_owner { | |||
361 | #define MAC_REG_CHIPGSR 0x9C | 361 | #define MAC_REG_CHIPGSR 0x9C |
362 | #define MAC_REG_TESTCFG 0x9D | 362 | #define MAC_REG_TESTCFG 0x9D |
363 | #define MAC_REG_DEBUG 0x9E | 363 | #define MAC_REG_DEBUG 0x9E |
364 | #define MAC_REG_CHIPGCR 0x9F | 364 | #define MAC_REG_CHIPGCR 0x9F /* Chip Operation and Diagnostic Control */ |
365 | #define MAC_REG_WOLCR0_SET 0xA0 | 365 | #define MAC_REG_WOLCR0_SET 0xA0 |
366 | #define MAC_REG_WOLCR1_SET 0xA1 | 366 | #define MAC_REG_WOLCR1_SET 0xA1 |
367 | #define MAC_REG_PWCFG_SET 0xA2 | 367 | #define MAC_REG_PWCFG_SET 0xA2 |
@@ -848,10 +848,10 @@ enum velocity_owner { | |||
848 | * Bits in CHIPGCR register | 848 | * Bits in CHIPGCR register |
849 | */ | 849 | */ |
850 | 850 | ||
851 | #define CHIPGCR_FCGMII 0x80 /* enable GMII mode */ | 851 | #define CHIPGCR_FCGMII 0x80 /* force GMII (else MII only) */ |
852 | #define CHIPGCR_FCFDX 0x40 | 852 | #define CHIPGCR_FCFDX 0x40 /* force full duplex */ |
853 | #define CHIPGCR_FCRESV 0x20 | 853 | #define CHIPGCR_FCRESV 0x20 |
854 | #define CHIPGCR_FCMODE 0x10 | 854 | #define CHIPGCR_FCMODE 0x10 /* enable MAC forced mode */ |
855 | #define CHIPGCR_LPSOPT 0x08 | 855 | #define CHIPGCR_LPSOPT 0x08 |
856 | #define CHIPGCR_TM1US 0x04 | 856 | #define CHIPGCR_TM1US 0x04 |
857 | #define CHIPGCR_TM0US 0x02 | 857 | #define CHIPGCR_TM0US 0x02 |
diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c index 01c05f53e2f9..77097e383cf4 100644 --- a/drivers/net/vxge/vxge-config.c +++ b/drivers/net/vxge/vxge-config.c | |||
@@ -387,8 +387,8 @@ vxge_hw_vpath_eprom_img_ver_get(struct __vxge_hw_device *hldev, | |||
387 | data1 = steer_ctrl = 0; | 387 | data1 = steer_ctrl = 0; |
388 | 388 | ||
389 | status = vxge_hw_vpath_fw_api(vpath, | 389 | status = vxge_hw_vpath_fw_api(vpath, |
390 | VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO, | ||
391 | VXGE_HW_FW_API_GET_EPROM_REV, | 390 | VXGE_HW_FW_API_GET_EPROM_REV, |
391 | VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO, | ||
392 | 0, &data0, &data1, &steer_ctrl); | 392 | 0, &data0, &data1, &steer_ctrl); |
393 | if (status != VXGE_HW_OK) | 393 | if (status != VXGE_HW_OK) |
394 | break; | 394 | break; |
@@ -2868,6 +2868,8 @@ __vxge_hw_ring_create(struct __vxge_hw_vpath_handle *vp, | |||
2868 | ring->rxd_init = attr->rxd_init; | 2868 | ring->rxd_init = attr->rxd_init; |
2869 | ring->rxd_term = attr->rxd_term; | 2869 | ring->rxd_term = attr->rxd_term; |
2870 | ring->buffer_mode = config->buffer_mode; | 2870 | ring->buffer_mode = config->buffer_mode; |
2871 | ring->tim_rti_cfg1_saved = vp->vpath->tim_rti_cfg1_saved; | ||
2872 | ring->tim_rti_cfg3_saved = vp->vpath->tim_rti_cfg3_saved; | ||
2871 | ring->rxds_limit = config->rxds_limit; | 2873 | ring->rxds_limit = config->rxds_limit; |
2872 | 2874 | ||
2873 | ring->rxd_size = vxge_hw_ring_rxd_size_get(config->buffer_mode); | 2875 | ring->rxd_size = vxge_hw_ring_rxd_size_get(config->buffer_mode); |
@@ -3511,6 +3513,8 @@ __vxge_hw_fifo_create(struct __vxge_hw_vpath_handle *vp, | |||
3511 | 3513 | ||
3512 | /* apply "interrupts per txdl" attribute */ | 3514 | /* apply "interrupts per txdl" attribute */ |
3513 | fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_UTILZ; | 3515 | fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_UTILZ; |
3516 | fifo->tim_tti_cfg1_saved = vpath->tim_tti_cfg1_saved; | ||
3517 | fifo->tim_tti_cfg3_saved = vpath->tim_tti_cfg3_saved; | ||
3514 | 3518 | ||
3515 | if (fifo->config->intr) | 3519 | if (fifo->config->intr) |
3516 | fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_PER_LIST; | 3520 | fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_PER_LIST; |
@@ -4377,6 +4381,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id) | |||
4377 | } | 4381 | } |
4378 | 4382 | ||
4379 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); | 4383 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); |
4384 | vpath->tim_tti_cfg1_saved = val64; | ||
4385 | |||
4380 | val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_TX]); | 4386 | val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_TX]); |
4381 | 4387 | ||
4382 | if (config->tti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) { | 4388 | if (config->tti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) { |
@@ -4433,6 +4439,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id) | |||
4433 | } | 4439 | } |
4434 | 4440 | ||
4435 | writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]); | 4441 | writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]); |
4442 | vpath->tim_tti_cfg3_saved = val64; | ||
4436 | } | 4443 | } |
4437 | 4444 | ||
4438 | if (config->ring.enable == VXGE_HW_RING_ENABLE) { | 4445 | if (config->ring.enable == VXGE_HW_RING_ENABLE) { |
@@ -4481,6 +4488,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id) | |||
4481 | } | 4488 | } |
4482 | 4489 | ||
4483 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]); | 4490 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]); |
4491 | vpath->tim_rti_cfg1_saved = val64; | ||
4492 | |||
4484 | val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_RX]); | 4493 | val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_RX]); |
4485 | 4494 | ||
4486 | if (config->rti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) { | 4495 | if (config->rti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) { |
@@ -4537,6 +4546,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id) | |||
4537 | } | 4546 | } |
4538 | 4547 | ||
4539 | writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]); | 4548 | writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]); |
4549 | vpath->tim_rti_cfg3_saved = val64; | ||
4540 | } | 4550 | } |
4541 | 4551 | ||
4542 | val64 = 0; | 4552 | val64 = 0; |
@@ -4555,26 +4565,6 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id) | |||
4555 | return status; | 4565 | return status; |
4556 | } | 4566 | } |
4557 | 4567 | ||
4558 | void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id) | ||
4559 | { | ||
4560 | struct __vxge_hw_virtualpath *vpath; | ||
4561 | struct vxge_hw_vpath_reg __iomem *vp_reg; | ||
4562 | struct vxge_hw_vp_config *config; | ||
4563 | u64 val64; | ||
4564 | |||
4565 | vpath = &hldev->virtual_paths[vp_id]; | ||
4566 | vp_reg = vpath->vp_reg; | ||
4567 | config = vpath->vp_config; | ||
4568 | |||
4569 | if (config->fifo.enable == VXGE_HW_FIFO_ENABLE && | ||
4570 | config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) { | ||
4571 | config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE; | ||
4572 | val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); | ||
4573 | val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI; | ||
4574 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); | ||
4575 | } | ||
4576 | } | ||
4577 | |||
4578 | /* | 4568 | /* |
4579 | * __vxge_hw_vpath_initialize | 4569 | * __vxge_hw_vpath_initialize |
4580 | * This routine is the final phase of init which initializes the | 4570 | * This routine is the final phase of init which initializes the |
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h index e249e288d160..3c53aa732c9d 100644 --- a/drivers/net/vxge/vxge-config.h +++ b/drivers/net/vxge/vxge-config.h | |||
@@ -682,6 +682,10 @@ struct __vxge_hw_virtualpath { | |||
682 | u32 vsport_number; | 682 | u32 vsport_number; |
683 | u32 max_kdfc_db; | 683 | u32 max_kdfc_db; |
684 | u32 max_nofl_db; | 684 | u32 max_nofl_db; |
685 | u64 tim_tti_cfg1_saved; | ||
686 | u64 tim_tti_cfg3_saved; | ||
687 | u64 tim_rti_cfg1_saved; | ||
688 | u64 tim_rti_cfg3_saved; | ||
685 | 689 | ||
686 | struct __vxge_hw_ring *____cacheline_aligned ringh; | 690 | struct __vxge_hw_ring *____cacheline_aligned ringh; |
687 | struct __vxge_hw_fifo *____cacheline_aligned fifoh; | 691 | struct __vxge_hw_fifo *____cacheline_aligned fifoh; |
@@ -921,6 +925,9 @@ struct __vxge_hw_ring { | |||
921 | u32 doorbell_cnt; | 925 | u32 doorbell_cnt; |
922 | u32 total_db_cnt; | 926 | u32 total_db_cnt; |
923 | u64 rxds_limit; | 927 | u64 rxds_limit; |
928 | u32 rtimer; | ||
929 | u64 tim_rti_cfg1_saved; | ||
930 | u64 tim_rti_cfg3_saved; | ||
924 | 931 | ||
925 | enum vxge_hw_status (*callback)( | 932 | enum vxge_hw_status (*callback)( |
926 | struct __vxge_hw_ring *ringh, | 933 | struct __vxge_hw_ring *ringh, |
@@ -1000,6 +1007,9 @@ struct __vxge_hw_fifo { | |||
1000 | u32 per_txdl_space; | 1007 | u32 per_txdl_space; |
1001 | u32 vp_id; | 1008 | u32 vp_id; |
1002 | u32 tx_intr_num; | 1009 | u32 tx_intr_num; |
1010 | u32 rtimer; | ||
1011 | u64 tim_tti_cfg1_saved; | ||
1012 | u64 tim_tti_cfg3_saved; | ||
1003 | 1013 | ||
1004 | enum vxge_hw_status (*callback)( | 1014 | enum vxge_hw_status (*callback)( |
1005 | struct __vxge_hw_fifo *fifo_handle, | 1015 | struct __vxge_hw_fifo *fifo_handle, |
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index c81a6512c683..e40f619b62b1 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c | |||
@@ -371,9 +371,6 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr, | |||
371 | struct vxge_hw_ring_rxd_info ext_info; | 371 | struct vxge_hw_ring_rxd_info ext_info; |
372 | vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d", | 372 | vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d", |
373 | ring->ndev->name, __func__, __LINE__); | 373 | ring->ndev->name, __func__, __LINE__); |
374 | ring->pkts_processed = 0; | ||
375 | |||
376 | vxge_hw_ring_replenish(ringh); | ||
377 | 374 | ||
378 | do { | 375 | do { |
379 | prefetch((char *)dtr + L1_CACHE_BYTES); | 376 | prefetch((char *)dtr + L1_CACHE_BYTES); |
@@ -1588,6 +1585,36 @@ static int vxge_reset_vpath(struct vxgedev *vdev, int vp_id) | |||
1588 | return ret; | 1585 | return ret; |
1589 | } | 1586 | } |
1590 | 1587 | ||
1588 | /* Configure CI */ | ||
1589 | static void vxge_config_ci_for_tti_rti(struct vxgedev *vdev) | ||
1590 | { | ||
1591 | int i = 0; | ||
1592 | |||
1593 | /* Enable CI for RTI */ | ||
1594 | if (vdev->config.intr_type == MSI_X) { | ||
1595 | for (i = 0; i < vdev->no_of_vpath; i++) { | ||
1596 | struct __vxge_hw_ring *hw_ring; | ||
1597 | |||
1598 | hw_ring = vdev->vpaths[i].ring.handle; | ||
1599 | vxge_hw_vpath_dynamic_rti_ci_set(hw_ring); | ||
1600 | } | ||
1601 | } | ||
1602 | |||
1603 | /* Enable CI for TTI */ | ||
1604 | for (i = 0; i < vdev->no_of_vpath; i++) { | ||
1605 | struct __vxge_hw_fifo *hw_fifo = vdev->vpaths[i].fifo.handle; | ||
1606 | vxge_hw_vpath_tti_ci_set(hw_fifo); | ||
1607 | /* | ||
1608 | * For Inta (with or without napi), Set CI ON for only one | ||
1609 | * vpath. (Have only one free running timer). | ||
1610 | */ | ||
1611 | if ((vdev->config.intr_type == INTA) && (i == 0)) | ||
1612 | break; | ||
1613 | } | ||
1614 | |||
1615 | return; | ||
1616 | } | ||
1617 | |||
1591 | static int do_vxge_reset(struct vxgedev *vdev, int event) | 1618 | static int do_vxge_reset(struct vxgedev *vdev, int event) |
1592 | { | 1619 | { |
1593 | enum vxge_hw_status status; | 1620 | enum vxge_hw_status status; |
@@ -1753,6 +1780,9 @@ static int do_vxge_reset(struct vxgedev *vdev, int event) | |||
1753 | netif_tx_wake_all_queues(vdev->ndev); | 1780 | netif_tx_wake_all_queues(vdev->ndev); |
1754 | } | 1781 | } |
1755 | 1782 | ||
1783 | /* configure CI */ | ||
1784 | vxge_config_ci_for_tti_rti(vdev); | ||
1785 | |||
1756 | out: | 1786 | out: |
1757 | vxge_debug_entryexit(VXGE_TRACE, | 1787 | vxge_debug_entryexit(VXGE_TRACE, |
1758 | "%s:%d Exiting...", __func__, __LINE__); | 1788 | "%s:%d Exiting...", __func__, __LINE__); |
@@ -1793,22 +1823,29 @@ static void vxge_reset(struct work_struct *work) | |||
1793 | */ | 1823 | */ |
1794 | static int vxge_poll_msix(struct napi_struct *napi, int budget) | 1824 | static int vxge_poll_msix(struct napi_struct *napi, int budget) |
1795 | { | 1825 | { |
1796 | struct vxge_ring *ring = | 1826 | struct vxge_ring *ring = container_of(napi, struct vxge_ring, napi); |
1797 | container_of(napi, struct vxge_ring, napi); | 1827 | int pkts_processed; |
1798 | int budget_org = budget; | 1828 | int budget_org = budget; |
1799 | ring->budget = budget; | ||
1800 | 1829 | ||
1830 | ring->budget = budget; | ||
1831 | ring->pkts_processed = 0; | ||
1801 | vxge_hw_vpath_poll_rx(ring->handle); | 1832 | vxge_hw_vpath_poll_rx(ring->handle); |
1833 | pkts_processed = ring->pkts_processed; | ||
1802 | 1834 | ||
1803 | if (ring->pkts_processed < budget_org) { | 1835 | if (ring->pkts_processed < budget_org) { |
1804 | napi_complete(napi); | 1836 | napi_complete(napi); |
1837 | |||
1805 | /* Re enable the Rx interrupts for the vpath */ | 1838 | /* Re enable the Rx interrupts for the vpath */ |
1806 | vxge_hw_channel_msix_unmask( | 1839 | vxge_hw_channel_msix_unmask( |
1807 | (struct __vxge_hw_channel *)ring->handle, | 1840 | (struct __vxge_hw_channel *)ring->handle, |
1808 | ring->rx_vector_no); | 1841 | ring->rx_vector_no); |
1842 | mmiowb(); | ||
1809 | } | 1843 | } |
1810 | 1844 | ||
1811 | return ring->pkts_processed; | 1845 | /* We are copying and returning the local variable, in case if after |
1846 | * clearing the msix interrupt above, if the interrupt fires right | ||
1847 | * away which can preempt this NAPI thread */ | ||
1848 | return pkts_processed; | ||
1812 | } | 1849 | } |
1813 | 1850 | ||
1814 | static int vxge_poll_inta(struct napi_struct *napi, int budget) | 1851 | static int vxge_poll_inta(struct napi_struct *napi, int budget) |
@@ -1824,6 +1861,7 @@ static int vxge_poll_inta(struct napi_struct *napi, int budget) | |||
1824 | for (i = 0; i < vdev->no_of_vpath; i++) { | 1861 | for (i = 0; i < vdev->no_of_vpath; i++) { |
1825 | ring = &vdev->vpaths[i].ring; | 1862 | ring = &vdev->vpaths[i].ring; |
1826 | ring->budget = budget; | 1863 | ring->budget = budget; |
1864 | ring->pkts_processed = 0; | ||
1827 | vxge_hw_vpath_poll_rx(ring->handle); | 1865 | vxge_hw_vpath_poll_rx(ring->handle); |
1828 | pkts_processed += ring->pkts_processed; | 1866 | pkts_processed += ring->pkts_processed; |
1829 | budget -= ring->pkts_processed; | 1867 | budget -= ring->pkts_processed; |
@@ -2054,6 +2092,7 @@ static int vxge_open_vpaths(struct vxgedev *vdev) | |||
2054 | netdev_get_tx_queue(vdev->ndev, 0); | 2092 | netdev_get_tx_queue(vdev->ndev, 0); |
2055 | vpath->fifo.indicate_max_pkts = | 2093 | vpath->fifo.indicate_max_pkts = |
2056 | vdev->config.fifo_indicate_max_pkts; | 2094 | vdev->config.fifo_indicate_max_pkts; |
2095 | vpath->fifo.tx_vector_no = 0; | ||
2057 | vpath->ring.rx_vector_no = 0; | 2096 | vpath->ring.rx_vector_no = 0; |
2058 | vpath->ring.rx_csum = vdev->rx_csum; | 2097 | vpath->ring.rx_csum = vdev->rx_csum; |
2059 | vpath->ring.rx_hwts = vdev->rx_hwts; | 2098 | vpath->ring.rx_hwts = vdev->rx_hwts; |
@@ -2079,6 +2118,61 @@ static int vxge_open_vpaths(struct vxgedev *vdev) | |||
2079 | return VXGE_HW_OK; | 2118 | return VXGE_HW_OK; |
2080 | } | 2119 | } |
2081 | 2120 | ||
2121 | /** | ||
2122 | * adaptive_coalesce_tx_interrupts - Changes the interrupt coalescing | ||
2123 | * if the interrupts are not within a range | ||
2124 | * @fifo: pointer to transmit fifo structure | ||
2125 | * Description: The function changes boundary timer and restriction timer | ||
2126 | * value depends on the traffic | ||
2127 | * Return Value: None | ||
2128 | */ | ||
2129 | static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo) | ||
2130 | { | ||
2131 | fifo->interrupt_count++; | ||
2132 | if (jiffies > fifo->jiffies + HZ / 100) { | ||
2133 | struct __vxge_hw_fifo *hw_fifo = fifo->handle; | ||
2134 | |||
2135 | fifo->jiffies = jiffies; | ||
2136 | if (fifo->interrupt_count > VXGE_T1A_MAX_TX_INTERRUPT_COUNT && | ||
2137 | hw_fifo->rtimer != VXGE_TTI_RTIMER_ADAPT_VAL) { | ||
2138 | hw_fifo->rtimer = VXGE_TTI_RTIMER_ADAPT_VAL; | ||
2139 | vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo); | ||
2140 | } else if (hw_fifo->rtimer != 0) { | ||
2141 | hw_fifo->rtimer = 0; | ||
2142 | vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo); | ||
2143 | } | ||
2144 | fifo->interrupt_count = 0; | ||
2145 | } | ||
2146 | } | ||
2147 | |||
2148 | /** | ||
2149 | * adaptive_coalesce_rx_interrupts - Changes the interrupt coalescing | ||
2150 | * if the interrupts are not within a range | ||
2151 | * @ring: pointer to receive ring structure | ||
2152 | * Description: The function increases of decreases the packet counts within | ||
2153 | * the ranges of traffic utilization, if the interrupts due to this ring are | ||
2154 | * not within a fixed range. | ||
2155 | * Return Value: Nothing | ||
2156 | */ | ||
2157 | static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring) | ||
2158 | { | ||
2159 | ring->interrupt_count++; | ||
2160 | if (jiffies > ring->jiffies + HZ / 100) { | ||
2161 | struct __vxge_hw_ring *hw_ring = ring->handle; | ||
2162 | |||
2163 | ring->jiffies = jiffies; | ||
2164 | if (ring->interrupt_count > VXGE_T1A_MAX_INTERRUPT_COUNT && | ||
2165 | hw_ring->rtimer != VXGE_RTI_RTIMER_ADAPT_VAL) { | ||
2166 | hw_ring->rtimer = VXGE_RTI_RTIMER_ADAPT_VAL; | ||
2167 | vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring); | ||
2168 | } else if (hw_ring->rtimer != 0) { | ||
2169 | hw_ring->rtimer = 0; | ||
2170 | vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring); | ||
2171 | } | ||
2172 | ring->interrupt_count = 0; | ||
2173 | } | ||
2174 | } | ||
2175 | |||
2082 | /* | 2176 | /* |
2083 | * vxge_isr_napi | 2177 | * vxge_isr_napi |
2084 | * @irq: the irq of the device. | 2178 | * @irq: the irq of the device. |
@@ -2139,24 +2233,39 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) | |||
2139 | 2233 | ||
2140 | #ifdef CONFIG_PCI_MSI | 2234 | #ifdef CONFIG_PCI_MSI |
2141 | 2235 | ||
2142 | static irqreturn_t | 2236 | static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id) |
2143 | vxge_tx_msix_handle(int irq, void *dev_id) | ||
2144 | { | 2237 | { |
2145 | struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id; | 2238 | struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id; |
2146 | 2239 | ||
2240 | adaptive_coalesce_tx_interrupts(fifo); | ||
2241 | |||
2242 | vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)fifo->handle, | ||
2243 | fifo->tx_vector_no); | ||
2244 | |||
2245 | vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)fifo->handle, | ||
2246 | fifo->tx_vector_no); | ||
2247 | |||
2147 | VXGE_COMPLETE_VPATH_TX(fifo); | 2248 | VXGE_COMPLETE_VPATH_TX(fifo); |
2148 | 2249 | ||
2250 | vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle, | ||
2251 | fifo->tx_vector_no); | ||
2252 | |||
2253 | mmiowb(); | ||
2254 | |||
2149 | return IRQ_HANDLED; | 2255 | return IRQ_HANDLED; |
2150 | } | 2256 | } |
2151 | 2257 | ||
2152 | static irqreturn_t | 2258 | static irqreturn_t vxge_rx_msix_napi_handle(int irq, void *dev_id) |
2153 | vxge_rx_msix_napi_handle(int irq, void *dev_id) | ||
2154 | { | 2259 | { |
2155 | struct vxge_ring *ring = (struct vxge_ring *)dev_id; | 2260 | struct vxge_ring *ring = (struct vxge_ring *)dev_id; |
2156 | 2261 | ||
2157 | /* MSIX_IDX for Rx is 1 */ | 2262 | adaptive_coalesce_rx_interrupts(ring); |
2263 | |||
2158 | vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)ring->handle, | 2264 | vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)ring->handle, |
2159 | ring->rx_vector_no); | 2265 | ring->rx_vector_no); |
2266 | |||
2267 | vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)ring->handle, | ||
2268 | ring->rx_vector_no); | ||
2160 | 2269 | ||
2161 | napi_schedule(&ring->napi); | 2270 | napi_schedule(&ring->napi); |
2162 | return IRQ_HANDLED; | 2271 | return IRQ_HANDLED; |
@@ -2173,14 +2282,20 @@ vxge_alarm_msix_handle(int irq, void *dev_id) | |||
2173 | VXGE_HW_VPATH_MSIX_ACTIVE) + VXGE_ALARM_MSIX_ID; | 2282 | VXGE_HW_VPATH_MSIX_ACTIVE) + VXGE_ALARM_MSIX_ID; |
2174 | 2283 | ||
2175 | for (i = 0; i < vdev->no_of_vpath; i++) { | 2284 | for (i = 0; i < vdev->no_of_vpath; i++) { |
2285 | /* Reduce the chance of loosing alarm interrupts by masking | ||
2286 | * the vector. A pending bit will be set if an alarm is | ||
2287 | * generated and on unmask the interrupt will be fired. | ||
2288 | */ | ||
2176 | vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id); | 2289 | vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id); |
2290 | vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id); | ||
2291 | mmiowb(); | ||
2177 | 2292 | ||
2178 | status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle, | 2293 | status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle, |
2179 | vdev->exec_mode); | 2294 | vdev->exec_mode); |
2180 | if (status == VXGE_HW_OK) { | 2295 | if (status == VXGE_HW_OK) { |
2181 | |||
2182 | vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle, | 2296 | vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle, |
2183 | msix_id); | 2297 | msix_id); |
2298 | mmiowb(); | ||
2184 | continue; | 2299 | continue; |
2185 | } | 2300 | } |
2186 | vxge_debug_intr(VXGE_ERR, | 2301 | vxge_debug_intr(VXGE_ERR, |
@@ -2299,6 +2414,9 @@ static int vxge_enable_msix(struct vxgedev *vdev) | |||
2299 | vpath->ring.rx_vector_no = (vpath->device_id * | 2414 | vpath->ring.rx_vector_no = (vpath->device_id * |
2300 | VXGE_HW_VPATH_MSIX_ACTIVE) + 1; | 2415 | VXGE_HW_VPATH_MSIX_ACTIVE) + 1; |
2301 | 2416 | ||
2417 | vpath->fifo.tx_vector_no = (vpath->device_id * | ||
2418 | VXGE_HW_VPATH_MSIX_ACTIVE); | ||
2419 | |||
2302 | vxge_hw_vpath_msix_set(vpath->handle, tim_msix_id, | 2420 | vxge_hw_vpath_msix_set(vpath->handle, tim_msix_id, |
2303 | VXGE_ALARM_MSIX_ID); | 2421 | VXGE_ALARM_MSIX_ID); |
2304 | } | 2422 | } |
@@ -2474,8 +2592,9 @@ INTA_MODE: | |||
2474 | "%s:vxge:INTA", vdev->ndev->name); | 2592 | "%s:vxge:INTA", vdev->ndev->name); |
2475 | vxge_hw_device_set_intr_type(vdev->devh, | 2593 | vxge_hw_device_set_intr_type(vdev->devh, |
2476 | VXGE_HW_INTR_MODE_IRQLINE); | 2594 | VXGE_HW_INTR_MODE_IRQLINE); |
2477 | vxge_hw_vpath_tti_ci_set(vdev->devh, | 2595 | |
2478 | vdev->vpaths[0].device_id); | 2596 | vxge_hw_vpath_tti_ci_set(vdev->vpaths[0].fifo.handle); |
2597 | |||
2479 | ret = request_irq((int) vdev->pdev->irq, | 2598 | ret = request_irq((int) vdev->pdev->irq, |
2480 | vxge_isr_napi, | 2599 | vxge_isr_napi, |
2481 | IRQF_SHARED, vdev->desc[0], vdev); | 2600 | IRQF_SHARED, vdev->desc[0], vdev); |
@@ -2745,6 +2864,10 @@ static int vxge_open(struct net_device *dev) | |||
2745 | } | 2864 | } |
2746 | 2865 | ||
2747 | netif_tx_start_all_queues(vdev->ndev); | 2866 | netif_tx_start_all_queues(vdev->ndev); |
2867 | |||
2868 | /* configure CI */ | ||
2869 | vxge_config_ci_for_tti_rti(vdev); | ||
2870 | |||
2748 | goto out0; | 2871 | goto out0; |
2749 | 2872 | ||
2750 | out2: | 2873 | out2: |
@@ -3348,7 +3471,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev, | |||
3348 | vxge_debug_init(VXGE_ERR, | 3471 | vxge_debug_init(VXGE_ERR, |
3349 | "%s: vpath memory allocation failed", | 3472 | "%s: vpath memory allocation failed", |
3350 | vdev->ndev->name); | 3473 | vdev->ndev->name); |
3351 | ret = -ENODEV; | 3474 | ret = -ENOMEM; |
3352 | goto _out1; | 3475 | goto _out1; |
3353 | } | 3476 | } |
3354 | 3477 | ||
@@ -3369,11 +3492,11 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev, | |||
3369 | if (vdev->config.gro_enable) | 3492 | if (vdev->config.gro_enable) |
3370 | ndev->features |= NETIF_F_GRO; | 3493 | ndev->features |= NETIF_F_GRO; |
3371 | 3494 | ||
3372 | if (register_netdev(ndev)) { | 3495 | ret = register_netdev(ndev); |
3496 | if (ret) { | ||
3373 | vxge_debug_init(vxge_hw_device_trace_level_get(hldev), | 3497 | vxge_debug_init(vxge_hw_device_trace_level_get(hldev), |
3374 | "%s: %s : device registration failed!", | 3498 | "%s: %s : device registration failed!", |
3375 | ndev->name, __func__); | 3499 | ndev->name, __func__); |
3376 | ret = -ENODEV; | ||
3377 | goto _out2; | 3500 | goto _out2; |
3378 | } | 3501 | } |
3379 | 3502 | ||
@@ -3444,6 +3567,11 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) | |||
3444 | /* in 2.6 will call stop() if device is up */ | 3567 | /* in 2.6 will call stop() if device is up */ |
3445 | unregister_netdev(dev); | 3568 | unregister_netdev(dev); |
3446 | 3569 | ||
3570 | kfree(vdev->vpaths); | ||
3571 | |||
3572 | /* we are safe to free it now */ | ||
3573 | free_netdev(dev); | ||
3574 | |||
3447 | vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", | 3575 | vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", |
3448 | buf); | 3576 | buf); |
3449 | vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, | 3577 | vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, |
@@ -3799,7 +3927,7 @@ static void __devinit vxge_device_config_init( | |||
3799 | break; | 3927 | break; |
3800 | 3928 | ||
3801 | case MSI_X: | 3929 | case MSI_X: |
3802 | device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX; | 3930 | device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX_ONE_SHOT; |
3803 | break; | 3931 | break; |
3804 | } | 3932 | } |
3805 | 3933 | ||
@@ -4335,10 +4463,10 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre) | |||
4335 | goto _exit1; | 4463 | goto _exit1; |
4336 | } | 4464 | } |
4337 | 4465 | ||
4338 | if (pci_request_region(pdev, 0, VXGE_DRIVER_NAME)) { | 4466 | ret = pci_request_region(pdev, 0, VXGE_DRIVER_NAME); |
4467 | if (ret) { | ||
4339 | vxge_debug_init(VXGE_ERR, | 4468 | vxge_debug_init(VXGE_ERR, |
4340 | "%s : request regions failed", __func__); | 4469 | "%s : request regions failed", __func__); |
4341 | ret = -ENODEV; | ||
4342 | goto _exit1; | 4470 | goto _exit1; |
4343 | } | 4471 | } |
4344 | 4472 | ||
@@ -4446,7 +4574,7 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre) | |||
4446 | if (!img[i].is_valid) | 4574 | if (!img[i].is_valid) |
4447 | break; | 4575 | break; |
4448 | vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version " | 4576 | vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version " |
4449 | "%d.%d.%d.%d\n", VXGE_DRIVER_NAME, i, | 4577 | "%d.%d.%d.%d", VXGE_DRIVER_NAME, i, |
4450 | VXGE_EPROM_IMG_MAJOR(img[i].version), | 4578 | VXGE_EPROM_IMG_MAJOR(img[i].version), |
4451 | VXGE_EPROM_IMG_MINOR(img[i].version), | 4579 | VXGE_EPROM_IMG_MINOR(img[i].version), |
4452 | VXGE_EPROM_IMG_FIX(img[i].version), | 4580 | VXGE_EPROM_IMG_FIX(img[i].version), |
@@ -4643,8 +4771,9 @@ _exit6: | |||
4643 | _exit5: | 4771 | _exit5: |
4644 | vxge_device_unregister(hldev); | 4772 | vxge_device_unregister(hldev); |
4645 | _exit4: | 4773 | _exit4: |
4646 | pci_disable_sriov(pdev); | 4774 | pci_set_drvdata(pdev, NULL); |
4647 | vxge_hw_device_terminate(hldev); | 4775 | vxge_hw_device_terminate(hldev); |
4776 | pci_disable_sriov(pdev); | ||
4648 | _exit3: | 4777 | _exit3: |
4649 | iounmap(attr.bar0); | 4778 | iounmap(attr.bar0); |
4650 | _exit2: | 4779 | _exit2: |
@@ -4655,7 +4784,7 @@ _exit0: | |||
4655 | kfree(ll_config); | 4784 | kfree(ll_config); |
4656 | kfree(device_config); | 4785 | kfree(device_config); |
4657 | driver_config->config_dev_cnt--; | 4786 | driver_config->config_dev_cnt--; |
4658 | pci_set_drvdata(pdev, NULL); | 4787 | driver_config->total_dev_cnt--; |
4659 | return ret; | 4788 | return ret; |
4660 | } | 4789 | } |
4661 | 4790 | ||
@@ -4668,45 +4797,34 @@ _exit0: | |||
4668 | static void __devexit vxge_remove(struct pci_dev *pdev) | 4797 | static void __devexit vxge_remove(struct pci_dev *pdev) |
4669 | { | 4798 | { |
4670 | struct __vxge_hw_device *hldev; | 4799 | struct __vxge_hw_device *hldev; |
4671 | struct vxgedev *vdev = NULL; | 4800 | struct vxgedev *vdev; |
4672 | struct net_device *dev; | 4801 | int i; |
4673 | int i = 0; | ||
4674 | 4802 | ||
4675 | hldev = pci_get_drvdata(pdev); | 4803 | hldev = pci_get_drvdata(pdev); |
4676 | |||
4677 | if (hldev == NULL) | 4804 | if (hldev == NULL) |
4678 | return; | 4805 | return; |
4679 | 4806 | ||
4680 | dev = hldev->ndev; | 4807 | vdev = netdev_priv(hldev->ndev); |
4681 | vdev = netdev_priv(dev); | ||
4682 | 4808 | ||
4683 | vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__); | 4809 | vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__); |
4684 | |||
4685 | vxge_debug_init(vdev->level_trace, "%s : removing PCI device...", | 4810 | vxge_debug_init(vdev->level_trace, "%s : removing PCI device...", |
4686 | __func__); | 4811 | __func__); |
4687 | vxge_device_unregister(hldev); | ||
4688 | 4812 | ||
4689 | for (i = 0; i < vdev->no_of_vpath; i++) { | 4813 | for (i = 0; i < vdev->no_of_vpath; i++) |
4690 | vxge_free_mac_add_list(&vdev->vpaths[i]); | 4814 | vxge_free_mac_add_list(&vdev->vpaths[i]); |
4691 | vdev->vpaths[i].mcast_addr_cnt = 0; | ||
4692 | vdev->vpaths[i].mac_addr_cnt = 0; | ||
4693 | } | ||
4694 | |||
4695 | kfree(vdev->vpaths); | ||
4696 | 4815 | ||
4816 | vxge_device_unregister(hldev); | ||
4817 | pci_set_drvdata(pdev, NULL); | ||
4818 | /* Do not call pci_disable_sriov here, as it will break child devices */ | ||
4819 | vxge_hw_device_terminate(hldev); | ||
4697 | iounmap(vdev->bar0); | 4820 | iounmap(vdev->bar0); |
4698 | 4821 | pci_release_region(pdev, 0); | |
4699 | /* we are safe to free it now */ | 4822 | pci_disable_device(pdev); |
4700 | free_netdev(dev); | 4823 | driver_config->config_dev_cnt--; |
4824 | driver_config->total_dev_cnt--; | ||
4701 | 4825 | ||
4702 | vxge_debug_init(vdev->level_trace, "%s:%d Device unregistered", | 4826 | vxge_debug_init(vdev->level_trace, "%s:%d Device unregistered", |
4703 | __func__, __LINE__); | 4827 | __func__, __LINE__); |
4704 | |||
4705 | vxge_hw_device_terminate(hldev); | ||
4706 | |||
4707 | pci_disable_device(pdev); | ||
4708 | pci_release_region(pdev, 0); | ||
4709 | pci_set_drvdata(pdev, NULL); | ||
4710 | vxge_debug_entryexit(vdev->level_trace, "%s:%d Exiting...", __func__, | 4828 | vxge_debug_entryexit(vdev->level_trace, "%s:%d Exiting...", __func__, |
4711 | __LINE__); | 4829 | __LINE__); |
4712 | } | 4830 | } |
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h index 5746fedc356f..40474f0da576 100644 --- a/drivers/net/vxge/vxge-main.h +++ b/drivers/net/vxge/vxge-main.h | |||
@@ -59,11 +59,13 @@ | |||
59 | #define VXGE_TTI_LTIMER_VAL 1000 | 59 | #define VXGE_TTI_LTIMER_VAL 1000 |
60 | #define VXGE_T1A_TTI_LTIMER_VAL 80 | 60 | #define VXGE_T1A_TTI_LTIMER_VAL 80 |
61 | #define VXGE_TTI_RTIMER_VAL 0 | 61 | #define VXGE_TTI_RTIMER_VAL 0 |
62 | #define VXGE_TTI_RTIMER_ADAPT_VAL 10 | ||
62 | #define VXGE_T1A_TTI_RTIMER_VAL 400 | 63 | #define VXGE_T1A_TTI_RTIMER_VAL 400 |
63 | #define VXGE_RTI_BTIMER_VAL 250 | 64 | #define VXGE_RTI_BTIMER_VAL 250 |
64 | #define VXGE_RTI_LTIMER_VAL 100 | 65 | #define VXGE_RTI_LTIMER_VAL 100 |
65 | #define VXGE_RTI_RTIMER_VAL 0 | 66 | #define VXGE_RTI_RTIMER_VAL 0 |
66 | #define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH | 67 | #define VXGE_RTI_RTIMER_ADAPT_VAL 15 |
68 | #define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH | ||
67 | #define VXGE_ISR_POLLING_CNT 8 | 69 | #define VXGE_ISR_POLLING_CNT 8 |
68 | #define VXGE_MAX_CONFIG_DEV 0xFF | 70 | #define VXGE_MAX_CONFIG_DEV 0xFF |
69 | #define VXGE_EXEC_MODE_DISABLE 0 | 71 | #define VXGE_EXEC_MODE_DISABLE 0 |
@@ -107,6 +109,14 @@ | |||
107 | #define RTI_T1A_RX_UFC_C 50 | 109 | #define RTI_T1A_RX_UFC_C 50 |
108 | #define RTI_T1A_RX_UFC_D 60 | 110 | #define RTI_T1A_RX_UFC_D 60 |
109 | 111 | ||
112 | /* | ||
113 | * The interrupt rate is maintained at 3k per second with the moderation | ||
114 | * parameters for most traffic but not all. This is the maximum interrupt | ||
115 | * count allowed per function with INTA or per vector in the case of | ||
116 | * MSI-X in a 10 millisecond time period. Enabled only for Titan 1A. | ||
117 | */ | ||
118 | #define VXGE_T1A_MAX_INTERRUPT_COUNT 100 | ||
119 | #define VXGE_T1A_MAX_TX_INTERRUPT_COUNT 200 | ||
110 | 120 | ||
111 | /* Milli secs timer period */ | 121 | /* Milli secs timer period */ |
112 | #define VXGE_TIMER_DELAY 10000 | 122 | #define VXGE_TIMER_DELAY 10000 |
@@ -247,6 +257,11 @@ struct vxge_fifo { | |||
247 | int tx_steering_type; | 257 | int tx_steering_type; |
248 | int indicate_max_pkts; | 258 | int indicate_max_pkts; |
249 | 259 | ||
260 | /* Adaptive interrupt moderation parameters used in T1A */ | ||
261 | unsigned long interrupt_count; | ||
262 | unsigned long jiffies; | ||
263 | |||
264 | u32 tx_vector_no; | ||
250 | /* Tx stats */ | 265 | /* Tx stats */ |
251 | struct vxge_fifo_stats stats; | 266 | struct vxge_fifo_stats stats; |
252 | } ____cacheline_aligned; | 267 | } ____cacheline_aligned; |
@@ -271,6 +286,10 @@ struct vxge_ring { | |||
271 | */ | 286 | */ |
272 | int driver_id; | 287 | int driver_id; |
273 | 288 | ||
289 | /* Adaptive interrupt moderation parameters used in T1A */ | ||
290 | unsigned long interrupt_count; | ||
291 | unsigned long jiffies; | ||
292 | |||
274 | /* copy of the flag indicating whether rx_csum is to be used */ | 293 | /* copy of the flag indicating whether rx_csum is to be used */ |
275 | u32 rx_csum:1, | 294 | u32 rx_csum:1, |
276 | rx_hwts:1; | 295 | rx_hwts:1; |
@@ -286,7 +305,7 @@ struct vxge_ring { | |||
286 | 305 | ||
287 | int vlan_tag_strip; | 306 | int vlan_tag_strip; |
288 | struct vlan_group *vlgrp; | 307 | struct vlan_group *vlgrp; |
289 | int rx_vector_no; | 308 | u32 rx_vector_no; |
290 | enum vxge_hw_status last_status; | 309 | enum vxge_hw_status last_status; |
291 | 310 | ||
292 | /* Rx stats */ | 311 | /* Rx stats */ |
diff --git a/drivers/net/vxge/vxge-traffic.c b/drivers/net/vxge/vxge-traffic.c index 4c10d6c4075f..8674f331311c 100644 --- a/drivers/net/vxge/vxge-traffic.c +++ b/drivers/net/vxge/vxge-traffic.c | |||
@@ -218,6 +218,68 @@ exit: | |||
218 | return status; | 218 | return status; |
219 | } | 219 | } |
220 | 220 | ||
221 | void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo) | ||
222 | { | ||
223 | struct vxge_hw_vpath_reg __iomem *vp_reg; | ||
224 | struct vxge_hw_vp_config *config; | ||
225 | u64 val64; | ||
226 | |||
227 | if (fifo->config->enable != VXGE_HW_FIFO_ENABLE) | ||
228 | return; | ||
229 | |||
230 | vp_reg = fifo->vp_reg; | ||
231 | config = container_of(fifo->config, struct vxge_hw_vp_config, fifo); | ||
232 | |||
233 | if (config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) { | ||
234 | config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE; | ||
235 | val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); | ||
236 | val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI; | ||
237 | fifo->tim_tti_cfg1_saved = val64; | ||
238 | writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring) | ||
243 | { | ||
244 | u64 val64 = ring->tim_rti_cfg1_saved; | ||
245 | |||
246 | val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI; | ||
247 | ring->tim_rti_cfg1_saved = val64; | ||
248 | writeq(val64, &ring->vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]); | ||
249 | } | ||
250 | |||
251 | void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo) | ||
252 | { | ||
253 | u64 val64 = fifo->tim_tti_cfg3_saved; | ||
254 | u64 timer = (fifo->rtimer * 1000) / 272; | ||
255 | |||
256 | val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff); | ||
257 | if (timer) | ||
258 | val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) | | ||
259 | VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(5); | ||
260 | |||
261 | writeq(val64, &fifo->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]); | ||
262 | /* tti_cfg3_saved is not updated again because it is | ||
263 | * initialized at one place only - init time. | ||
264 | */ | ||
265 | } | ||
266 | |||
267 | void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring) | ||
268 | { | ||
269 | u64 val64 = ring->tim_rti_cfg3_saved; | ||
270 | u64 timer = (ring->rtimer * 1000) / 272; | ||
271 | |||
272 | val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff); | ||
273 | if (timer) | ||
274 | val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) | | ||
275 | VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(4); | ||
276 | |||
277 | writeq(val64, &ring->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]); | ||
278 | /* rti_cfg3_saved is not updated again because it is | ||
279 | * initialized at one place only - init time. | ||
280 | */ | ||
281 | } | ||
282 | |||
221 | /** | 283 | /** |
222 | * vxge_hw_channel_msix_mask - Mask MSIX Vector. | 284 | * vxge_hw_channel_msix_mask - Mask MSIX Vector. |
223 | * @channeh: Channel for rx or tx handle | 285 | * @channeh: Channel for rx or tx handle |
@@ -254,6 +316,23 @@ vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channel, int msix_id) | |||
254 | } | 316 | } |
255 | 317 | ||
256 | /** | 318 | /** |
319 | * vxge_hw_channel_msix_clear - Unmask the MSIX Vector. | ||
320 | * @channel: Channel for rx or tx handle | ||
321 | * @msix_id: MSI ID | ||
322 | * | ||
323 | * The function unmasks the msix interrupt for the given msix_id | ||
324 | * if configured in MSIX oneshot mode | ||
325 | * | ||
326 | * Returns: 0 | ||
327 | */ | ||
328 | void vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channel, int msix_id) | ||
329 | { | ||
330 | __vxge_hw_pio_mem_write32_upper( | ||
331 | (u32) vxge_bVALn(vxge_mBIT(msix_id >> 2), 0, 32), | ||
332 | &channel->common_reg->clr_msix_one_shot_vec[msix_id % 4]); | ||
333 | } | ||
334 | |||
335 | /** | ||
257 | * vxge_hw_device_set_intr_type - Updates the configuration | 336 | * vxge_hw_device_set_intr_type - Updates the configuration |
258 | * with new interrupt type. | 337 | * with new interrupt type. |
259 | * @hldev: HW device handle. | 338 | * @hldev: HW device handle. |
@@ -2191,19 +2270,14 @@ vxge_hw_vpath_msix_set(struct __vxge_hw_vpath_handle *vp, int *tim_msix_id, | |||
2191 | if (vpath->hldev->config.intr_mode == | 2270 | if (vpath->hldev->config.intr_mode == |
2192 | VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) { | 2271 | VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) { |
2193 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( | 2272 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( |
2273 | VXGE_HW_ONE_SHOT_VECT0_EN_ONE_SHOT_VECT0_EN, | ||
2274 | 0, 32), &vp_reg->one_shot_vect0_en); | ||
2275 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( | ||
2194 | VXGE_HW_ONE_SHOT_VECT1_EN_ONE_SHOT_VECT1_EN, | 2276 | VXGE_HW_ONE_SHOT_VECT1_EN_ONE_SHOT_VECT1_EN, |
2195 | 0, 32), &vp_reg->one_shot_vect1_en); | 2277 | 0, 32), &vp_reg->one_shot_vect1_en); |
2196 | } | ||
2197 | |||
2198 | if (vpath->hldev->config.intr_mode == | ||
2199 | VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) { | ||
2200 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( | 2278 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( |
2201 | VXGE_HW_ONE_SHOT_VECT2_EN_ONE_SHOT_VECT2_EN, | 2279 | VXGE_HW_ONE_SHOT_VECT2_EN_ONE_SHOT_VECT2_EN, |
2202 | 0, 32), &vp_reg->one_shot_vect2_en); | 2280 | 0, 32), &vp_reg->one_shot_vect2_en); |
2203 | |||
2204 | __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn( | ||
2205 | VXGE_HW_ONE_SHOT_VECT3_EN_ONE_SHOT_VECT3_EN, | ||
2206 | 0, 32), &vp_reg->one_shot_vect3_en); | ||
2207 | } | 2281 | } |
2208 | } | 2282 | } |
2209 | 2283 | ||
@@ -2229,6 +2303,32 @@ vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vp, int msix_id) | |||
2229 | } | 2303 | } |
2230 | 2304 | ||
2231 | /** | 2305 | /** |
2306 | * vxge_hw_vpath_msix_clear - Clear MSIX Vector. | ||
2307 | * @vp: Virtual Path handle. | ||
2308 | * @msix_id: MSI ID | ||
2309 | * | ||
2310 | * The function clears the msix interrupt for the given msix_id | ||
2311 | * | ||
2312 | * Returns: 0, | ||
2313 | * Otherwise, VXGE_HW_ERR_WRONG_IRQ if the msix index is out of range | ||
2314 | * status. | ||
2315 | * See also: | ||
2316 | */ | ||
2317 | void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id) | ||
2318 | { | ||
2319 | struct __vxge_hw_device *hldev = vp->vpath->hldev; | ||
2320 | |||
2321 | if ((hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT)) | ||
2322 | __vxge_hw_pio_mem_write32_upper( | ||
2323 | (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32), | ||
2324 | &hldev->common_reg->clr_msix_one_shot_vec[msix_id % 4]); | ||
2325 | else | ||
2326 | __vxge_hw_pio_mem_write32_upper( | ||
2327 | (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32), | ||
2328 | &hldev->common_reg->clear_msix_mask_vect[msix_id % 4]); | ||
2329 | } | ||
2330 | |||
2331 | /** | ||
2232 | * vxge_hw_vpath_msix_unmask - Unmask the MSIX Vector. | 2332 | * vxge_hw_vpath_msix_unmask - Unmask the MSIX Vector. |
2233 | * @vp: Virtual Path handle. | 2333 | * @vp: Virtual Path handle. |
2234 | * @msix_id: MSI ID | 2334 | * @msix_id: MSI ID |
diff --git a/drivers/net/vxge/vxge-traffic.h b/drivers/net/vxge/vxge-traffic.h index d48486d6afa1..9d9dfda4c7ab 100644 --- a/drivers/net/vxge/vxge-traffic.h +++ b/drivers/net/vxge/vxge-traffic.h | |||
@@ -2142,6 +2142,10 @@ void vxge_hw_device_clear_tx_rx( | |||
2142 | * Virtual Paths | 2142 | * Virtual Paths |
2143 | */ | 2143 | */ |
2144 | 2144 | ||
2145 | void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring); | ||
2146 | |||
2147 | void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo); | ||
2148 | |||
2145 | u32 vxge_hw_vpath_id( | 2149 | u32 vxge_hw_vpath_id( |
2146 | struct __vxge_hw_vpath_handle *vpath_handle); | 2150 | struct __vxge_hw_vpath_handle *vpath_handle); |
2147 | 2151 | ||
@@ -2245,6 +2249,8 @@ void | |||
2245 | vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vpath_handle, | 2249 | vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vpath_handle, |
2246 | int msix_id); | 2250 | int msix_id); |
2247 | 2251 | ||
2252 | void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id); | ||
2253 | |||
2248 | void vxge_hw_device_flush_io(struct __vxge_hw_device *devh); | 2254 | void vxge_hw_device_flush_io(struct __vxge_hw_device *devh); |
2249 | 2255 | ||
2250 | void | 2256 | void |
@@ -2270,6 +2276,9 @@ void | |||
2270 | vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channelh, int msix_id); | 2276 | vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channelh, int msix_id); |
2271 | 2277 | ||
2272 | void | 2278 | void |
2279 | vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channelh, int msix_id); | ||
2280 | |||
2281 | void | ||
2273 | vxge_hw_channel_dtr_try_complete(struct __vxge_hw_channel *channel, | 2282 | vxge_hw_channel_dtr_try_complete(struct __vxge_hw_channel *channel, |
2274 | void **dtrh); | 2283 | void **dtrh); |
2275 | 2284 | ||
@@ -2282,7 +2291,8 @@ vxge_hw_channel_dtr_free(struct __vxge_hw_channel *channel, void *dtrh); | |||
2282 | int | 2291 | int |
2283 | vxge_hw_channel_dtr_count(struct __vxge_hw_channel *channel); | 2292 | vxge_hw_channel_dtr_count(struct __vxge_hw_channel *channel); |
2284 | 2293 | ||
2285 | void | 2294 | void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo); |
2286 | vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id); | 2295 | |
2296 | void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring); | ||
2287 | 2297 | ||
2288 | #endif | 2298 | #endif |
diff --git a/drivers/net/vxge/vxge-version.h b/drivers/net/vxge/vxge-version.h index ad2f99b9bcf3..581e21525e85 100644 --- a/drivers/net/vxge/vxge-version.h +++ b/drivers/net/vxge/vxge-version.h | |||
@@ -16,8 +16,8 @@ | |||
16 | 16 | ||
17 | #define VXGE_VERSION_MAJOR "2" | 17 | #define VXGE_VERSION_MAJOR "2" |
18 | #define VXGE_VERSION_MINOR "5" | 18 | #define VXGE_VERSION_MINOR "5" |
19 | #define VXGE_VERSION_FIX "1" | 19 | #define VXGE_VERSION_FIX "2" |
20 | #define VXGE_VERSION_BUILD "22082" | 20 | #define VXGE_VERSION_BUILD "22259" |
21 | #define VXGE_VERSION_FOR "k" | 21 | #define VXGE_VERSION_FOR "k" |
22 | 22 | ||
23 | #define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld)) | 23 | #define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld)) |
diff --git a/include/linux/audit.h b/include/linux/audit.h index 359df0487690..9d339eb27881 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h | |||
@@ -103,6 +103,8 @@ | |||
103 | #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ | 103 | #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ |
104 | #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ | 104 | #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ |
105 | #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ | 105 | #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ |
106 | #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ | ||
107 | #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ | ||
106 | 108 | ||
107 | #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ | 109 | #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ |
108 | #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ | 110 | #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ |
diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed75..d638e85dc501 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h | |||
@@ -279,8 +279,6 @@ enum dccp_state { | |||
279 | DCCP_MAX_STATES | 279 | DCCP_MAX_STATES |
280 | }; | 280 | }; |
281 | 281 | ||
282 | #define DCCP_STATE_MASK 0x1f | ||
283 | |||
284 | enum { | 282 | enum { |
285 | DCCPF_OPEN = TCPF_ESTABLISHED, | 283 | DCCPF_OPEN = TCPF_ESTABLISHED, |
286 | DCCPF_REQUESTING = TCPF_SYN_SENT, | 284 | DCCPF_REQUESTING = TCPF_SYN_SENT, |
diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 6485d2a89bec..f4a2e6b1b864 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h | |||
@@ -135,6 +135,7 @@ enum { | |||
135 | IFLA_VF_PORTS, | 135 | IFLA_VF_PORTS, |
136 | IFLA_PORT_SELF, | 136 | IFLA_PORT_SELF, |
137 | IFLA_AF_SPEC, | 137 | IFLA_AF_SPEC, |
138 | IFLA_GROUP, /* Group the device belongs to */ | ||
138 | __IFLA_MAX | 139 | __IFLA_MAX |
139 | }; | 140 | }; |
140 | 141 | ||
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 5f43a3b2e3ad..4deb3834d62c 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h | |||
@@ -89,6 +89,14 @@ | |||
89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ | 89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ |
90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ | 90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ |
91 | 91 | ||
92 | #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ | ||
93 | IP_VS_CONN_F_NOOUTPUT | \ | ||
94 | IP_VS_CONN_F_INACTIVE | \ | ||
95 | IP_VS_CONN_F_SEQ_MASK | \ | ||
96 | IP_VS_CONN_F_NO_CPORT | \ | ||
97 | IP_VS_CONN_F_TEMPLATE \ | ||
98 | ) | ||
99 | |||
92 | /* Flags that are not sent to backup server start from bit 16 */ | 100 | /* Flags that are not sent to backup server start from bit 16 */ |
93 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ | 101 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ |
94 | 102 | ||
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d971346b0340..371fa8839d51 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -75,6 +75,9 @@ struct wireless_dev; | |||
75 | #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ | 75 | #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ |
76 | #define NET_RX_DROP 1 /* packet dropped */ | 76 | #define NET_RX_DROP 1 /* packet dropped */ |
77 | 77 | ||
78 | /* Initial net device group. All devices belong to group 0 by default. */ | ||
79 | #define INIT_NETDEV_GROUP 0 | ||
80 | |||
78 | /* | 81 | /* |
79 | * Transmit return codes: transmit return codes originate from three different | 82 | * Transmit return codes: transmit return codes originate from three different |
80 | * namespaces: | 83 | * namespaces: |
@@ -643,6 +646,14 @@ struct xps_dev_maps { | |||
643 | (nr_cpu_ids * sizeof(struct xps_map *))) | 646 | (nr_cpu_ids * sizeof(struct xps_map *))) |
644 | #endif /* CONFIG_XPS */ | 647 | #endif /* CONFIG_XPS */ |
645 | 648 | ||
649 | #define TC_MAX_QUEUE 16 | ||
650 | #define TC_BITMASK 15 | ||
651 | /* HW offloaded queuing disciplines txq count and offset maps */ | ||
652 | struct netdev_tc_txq { | ||
653 | u16 count; | ||
654 | u16 offset; | ||
655 | }; | ||
656 | |||
646 | /* | 657 | /* |
647 | * This structure defines the management hooks for network devices. | 658 | * This structure defines the management hooks for network devices. |
648 | * The following hooks can be defined; unless noted otherwise, they are | 659 | * The following hooks can be defined; unless noted otherwise, they are |
@@ -753,6 +764,11 @@ struct xps_dev_maps { | |||
753 | * int (*ndo_set_vf_port)(struct net_device *dev, int vf, | 764 | * int (*ndo_set_vf_port)(struct net_device *dev, int vf, |
754 | * struct nlattr *port[]); | 765 | * struct nlattr *port[]); |
755 | * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); | 766 | * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); |
767 | * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) | ||
768 | * Called to setup 'tc' number of traffic classes in the net device. This | ||
769 | * is always called from the stack with the rtnl lock held and netif tx | ||
770 | * queues stopped. This allows the netdevice to perform queue management | ||
771 | * safely. | ||
756 | */ | 772 | */ |
757 | #define HAVE_NET_DEVICE_OPS | 773 | #define HAVE_NET_DEVICE_OPS |
758 | struct net_device_ops { | 774 | struct net_device_ops { |
@@ -811,6 +827,7 @@ struct net_device_ops { | |||
811 | struct nlattr *port[]); | 827 | struct nlattr *port[]); |
812 | int (*ndo_get_vf_port)(struct net_device *dev, | 828 | int (*ndo_get_vf_port)(struct net_device *dev, |
813 | int vf, struct sk_buff *skb); | 829 | int vf, struct sk_buff *skb); |
830 | int (*ndo_setup_tc)(struct net_device *dev, u8 tc); | ||
814 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) | 831 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) |
815 | int (*ndo_fcoe_enable)(struct net_device *dev); | 832 | int (*ndo_fcoe_enable)(struct net_device *dev); |
816 | int (*ndo_fcoe_disable)(struct net_device *dev); | 833 | int (*ndo_fcoe_disable)(struct net_device *dev); |
@@ -1143,6 +1160,9 @@ struct net_device { | |||
1143 | /* Data Center Bridging netlink ops */ | 1160 | /* Data Center Bridging netlink ops */ |
1144 | const struct dcbnl_rtnl_ops *dcbnl_ops; | 1161 | const struct dcbnl_rtnl_ops *dcbnl_ops; |
1145 | #endif | 1162 | #endif |
1163 | u8 num_tc; | ||
1164 | struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; | ||
1165 | u8 prio_tc_map[TC_BITMASK + 1]; | ||
1146 | 1166 | ||
1147 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) | 1167 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) |
1148 | /* max exchange id for FCoE LRO by ddp */ | 1168 | /* max exchange id for FCoE LRO by ddp */ |
@@ -1153,12 +1173,66 @@ struct net_device { | |||
1153 | 1173 | ||
1154 | /* phy device may attach itself for hardware timestamping */ | 1174 | /* phy device may attach itself for hardware timestamping */ |
1155 | struct phy_device *phydev; | 1175 | struct phy_device *phydev; |
1176 | |||
1177 | /* group the device belongs to */ | ||
1178 | int group; | ||
1156 | }; | 1179 | }; |
1157 | #define to_net_dev(d) container_of(d, struct net_device, dev) | 1180 | #define to_net_dev(d) container_of(d, struct net_device, dev) |
1158 | 1181 | ||
1159 | #define NETDEV_ALIGN 32 | 1182 | #define NETDEV_ALIGN 32 |
1160 | 1183 | ||
1161 | static inline | 1184 | static inline |
1185 | int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) | ||
1186 | { | ||
1187 | return dev->prio_tc_map[prio & TC_BITMASK]; | ||
1188 | } | ||
1189 | |||
1190 | static inline | ||
1191 | int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) | ||
1192 | { | ||
1193 | if (tc >= dev->num_tc) | ||
1194 | return -EINVAL; | ||
1195 | |||
1196 | dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; | ||
1197 | return 0; | ||
1198 | } | ||
1199 | |||
1200 | static inline | ||
1201 | void netdev_reset_tc(struct net_device *dev) | ||
1202 | { | ||
1203 | dev->num_tc = 0; | ||
1204 | memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); | ||
1205 | memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); | ||
1206 | } | ||
1207 | |||
1208 | static inline | ||
1209 | int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) | ||
1210 | { | ||
1211 | if (tc >= dev->num_tc) | ||
1212 | return -EINVAL; | ||
1213 | |||
1214 | dev->tc_to_txq[tc].count = count; | ||
1215 | dev->tc_to_txq[tc].offset = offset; | ||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | static inline | ||
1220 | int netdev_set_num_tc(struct net_device *dev, u8 num_tc) | ||
1221 | { | ||
1222 | if (num_tc > TC_MAX_QUEUE) | ||
1223 | return -EINVAL; | ||
1224 | |||
1225 | dev->num_tc = num_tc; | ||
1226 | return 0; | ||
1227 | } | ||
1228 | |||
1229 | static inline | ||
1230 | int netdev_get_num_tc(struct net_device *dev) | ||
1231 | { | ||
1232 | return dev->num_tc; | ||
1233 | } | ||
1234 | |||
1235 | static inline | ||
1162 | struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, | 1236 | struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, |
1163 | unsigned int index) | 1237 | unsigned int index) |
1164 | { | 1238 | { |
@@ -1844,6 +1918,7 @@ extern int dev_set_alias(struct net_device *, const char *, size_t); | |||
1844 | extern int dev_change_net_namespace(struct net_device *, | 1918 | extern int dev_change_net_namespace(struct net_device *, |
1845 | struct net *, const char *); | 1919 | struct net *, const char *); |
1846 | extern int dev_set_mtu(struct net_device *, int); | 1920 | extern int dev_set_mtu(struct net_device *, int); |
1921 | extern void dev_set_group(struct net_device *, int); | ||
1847 | extern int dev_set_mac_address(struct net_device *, | 1922 | extern int dev_set_mac_address(struct net_device *, |
1848 | struct sockaddr *); | 1923 | struct sockaddr *); |
1849 | extern int dev_hard_start_xmit(struct sk_buff *skb, | 1924 | extern int dev_hard_start_xmit(struct sk_buff *skb, |
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 1893837b3966..eeec00abb664 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h | |||
@@ -24,16 +24,20 @@ | |||
24 | #define NF_MAX_VERDICT NF_STOP | 24 | #define NF_MAX_VERDICT NF_STOP |
25 | 25 | ||
26 | /* we overload the higher bits for encoding auxiliary data such as the queue | 26 | /* we overload the higher bits for encoding auxiliary data such as the queue |
27 | * number. Not nice, but better than additional function arguments. */ | 27 | * number or errno values. Not nice, but better than additional function |
28 | #define NF_VERDICT_MASK 0x0000ffff | 28 | * arguments. */ |
29 | #define NF_VERDICT_BITS 16 | 29 | #define NF_VERDICT_MASK 0x000000ff |
30 | |||
31 | /* extra verdict flags have mask 0x0000ff00 */ | ||
32 | #define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 | ||
30 | 33 | ||
34 | /* queue number (NF_QUEUE) or errno (NF_DROP) */ | ||
31 | #define NF_VERDICT_QMASK 0xffff0000 | 35 | #define NF_VERDICT_QMASK 0xffff0000 |
32 | #define NF_VERDICT_QBITS 16 | 36 | #define NF_VERDICT_QBITS 16 |
33 | 37 | ||
34 | #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) | 38 | #define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) |
35 | 39 | ||
36 | #define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) | 40 | #define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) |
37 | 41 | ||
38 | /* only for userspace compatibility */ | 42 | /* only for userspace compatibility */ |
39 | #ifndef __KERNEL__ | 43 | #ifndef __KERNEL__ |
@@ -41,6 +45,9 @@ | |||
41 | <= 0x2000 is used for protocol-flags. */ | 45 | <= 0x2000 is used for protocol-flags. */ |
42 | #define NFC_UNKNOWN 0x4000 | 46 | #define NFC_UNKNOWN 0x4000 |
43 | #define NFC_ALTERED 0x8000 | 47 | #define NFC_ALTERED 0x8000 |
48 | |||
49 | /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ | ||
50 | #define NF_VERDICT_BITS 16 | ||
44 | #endif | 51 | #endif |
45 | 52 | ||
46 | enum nf_inet_hooks { | 53 | enum nf_inet_hooks { |
@@ -72,6 +79,10 @@ union nf_inet_addr { | |||
72 | 79 | ||
73 | #ifdef __KERNEL__ | 80 | #ifdef __KERNEL__ |
74 | #ifdef CONFIG_NETFILTER | 81 | #ifdef CONFIG_NETFILTER |
82 | static inline int NF_DROP_GETERR(int verdict) | ||
83 | { | ||
84 | return -(verdict >> NF_VERDICT_QBITS); | ||
85 | } | ||
75 | 86 | ||
76 | static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, | 87 | static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, |
77 | const union nf_inet_addr *a2) | 88 | const union nf_inet_addr *a2) |
@@ -267,7 +278,7 @@ struct nf_afinfo { | |||
267 | int route_key_size; | 278 | int route_key_size; |
268 | }; | 279 | }; |
269 | 280 | ||
270 | extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; | 281 | extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO]; |
271 | static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) | 282 | static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) |
272 | { | 283 | { |
273 | return rcu_dereference(nf_afinfo[family]); | 284 | return rcu_dereference(nf_afinfo[family]); |
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) | |||
357 | #endif /*CONFIG_NETFILTER*/ | 368 | #endif /*CONFIG_NETFILTER*/ |
358 | 369 | ||
359 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 370 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
360 | extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | 371 | extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; |
361 | extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); | 372 | extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); |
362 | extern void (*nf_ct_destroy)(struct nf_conntrack *); | 373 | extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; |
363 | #else | 374 | #else |
364 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} | 375 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} |
365 | #endif | 376 | #endif |
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9d40effe7ca7..89c0d1e20d72 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild | |||
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h | |||
9 | header-y += nfnetlink_log.h | 9 | header-y += nfnetlink_log.h |
10 | header-y += nfnetlink_queue.h | 10 | header-y += nfnetlink_queue.h |
11 | header-y += x_tables.h | 11 | header-y += x_tables.h |
12 | header-y += xt_AUDIT.h | ||
12 | header-y += xt_CHECKSUM.h | 13 | header-y += xt_CHECKSUM.h |
13 | header-y += xt_CLASSIFY.h | 14 | header-y += xt_CLASSIFY.h |
14 | header-y += xt_CONNMARK.h | 15 | header-y += xt_CONNMARK.h |
@@ -55,6 +56,7 @@ header-y += xt_rateest.h | |||
55 | header-y += xt_realm.h | 56 | header-y += xt_realm.h |
56 | header-y += xt_recent.h | 57 | header-y += xt_recent.h |
57 | header-y += xt_sctp.h | 58 | header-y += xt_sctp.h |
59 | header-y += xt_socket.h | ||
58 | header-y += xt_state.h | 60 | header-y += xt_state.h |
59 | header-y += xt_statistic.h | 61 | header-y += xt_statistic.h |
60 | header-y += xt_string.h | 62 | header-y += xt_string.h |
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h new file mode 100644 index 000000000000..064bc63a5346 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_snmp.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef _NF_CONNTRACK_SNMP_H | ||
2 | #define _NF_CONNTRACK_SNMP_H | ||
3 | |||
4 | extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, | ||
5 | unsigned int protoff, | ||
6 | struct nf_conn *ct, | ||
7 | enum ip_conntrack_info ctinfo); | ||
8 | |||
9 | #endif /* _NF_CONNTRACK_SNMP_H */ | ||
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 19711e3ffd42..debf1aefd753 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h | |||
@@ -42,6 +42,7 @@ enum ctattr_type { | |||
42 | CTA_SECMARK, /* obsolete */ | 42 | CTA_SECMARK, /* obsolete */ |
43 | CTA_ZONE, | 43 | CTA_ZONE, |
44 | CTA_SECCTX, | 44 | CTA_SECCTX, |
45 | CTA_TIMESTAMP, | ||
45 | __CTA_MAX | 46 | __CTA_MAX |
46 | }; | 47 | }; |
47 | #define CTA_MAX (__CTA_MAX - 1) | 48 | #define CTA_MAX (__CTA_MAX - 1) |
@@ -127,6 +128,14 @@ enum ctattr_counters { | |||
127 | }; | 128 | }; |
128 | #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) | 129 | #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) |
129 | 130 | ||
131 | enum ctattr_tstamp { | ||
132 | CTA_TIMESTAMP_UNSPEC, | ||
133 | CTA_TIMESTAMP_START, | ||
134 | CTA_TIMESTAMP_STOP, | ||
135 | __CTA_TIMESTAMP_MAX | ||
136 | }; | ||
137 | #define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1) | ||
138 | |||
130 | enum ctattr_nat { | 139 | enum ctattr_nat { |
131 | CTA_NAT_UNSPEC, | 140 | CTA_NAT_UNSPEC, |
132 | CTA_NAT_MINIP, | 141 | CTA_NAT_MINIP, |
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6712e713b299..37219525ff6f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h | |||
@@ -611,8 +611,9 @@ struct _compat_xt_align { | |||
611 | extern void xt_compat_lock(u_int8_t af); | 611 | extern void xt_compat_lock(u_int8_t af); |
612 | extern void xt_compat_unlock(u_int8_t af); | 612 | extern void xt_compat_unlock(u_int8_t af); |
613 | 613 | ||
614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); | 614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); |
615 | extern void xt_compat_flush_offsets(u_int8_t af); | 615 | extern void xt_compat_flush_offsets(u_int8_t af); |
616 | extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); | ||
616 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); | 617 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); |
617 | 618 | ||
618 | extern int xt_compat_match_offset(const struct xt_match *match); | 619 | extern int xt_compat_match_offset(const struct xt_match *match); |
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h new file mode 100644 index 000000000000..38751d2ea52b --- /dev/null +++ b/include/linux/netfilter/xt_AUDIT.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Header file for iptables xt_AUDIT target | ||
3 | * | ||
4 | * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> | ||
5 | * (C) 2010-2011 Red Hat, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #ifndef _XT_AUDIT_TARGET_H | ||
13 | #define _XT_AUDIT_TARGET_H | ||
14 | |||
15 | #include <linux/types.h> | ||
16 | |||
17 | enum { | ||
18 | XT_AUDIT_TYPE_ACCEPT = 0, | ||
19 | XT_AUDIT_TYPE_DROP, | ||
20 | XT_AUDIT_TYPE_REJECT, | ||
21 | __XT_AUDIT_TYPE_MAX, | ||
22 | }; | ||
23 | |||
24 | #define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1) | ||
25 | |||
26 | struct xt_audit_info { | ||
27 | __u8 type; /* XT_AUDIT_TYPE_* */ | ||
28 | }; | ||
29 | |||
30 | #endif /* _XT_AUDIT_TARGET_H */ | ||
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index 1b564106891d..b56e76811c04 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h | |||
@@ -1,14 +1,16 @@ | |||
1 | #ifndef _XT_CT_H | 1 | #ifndef _XT_CT_H |
2 | #define _XT_CT_H | 2 | #define _XT_CT_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define XT_CT_NOTRACK 0x1 | 6 | #define XT_CT_NOTRACK 0x1 |
5 | 7 | ||
6 | struct xt_ct_target_info { | 8 | struct xt_ct_target_info { |
7 | u_int16_t flags; | 9 | __u16 flags; |
8 | u_int16_t zone; | 10 | __u16 zone; |
9 | u_int32_t ct_events; | 11 | __u32 ct_events; |
10 | u_int32_t exp_events; | 12 | __u32 exp_events; |
11 | char helper[16]; | 13 | char helper[16]; |
12 | 14 | ||
13 | /* Used internally by the kernel */ | 15 | /* Used internally by the kernel */ |
14 | struct nf_conn *ct __attribute__((aligned(8))); | 16 | struct nf_conn *ct __attribute__((aligned(8))); |
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h index 2584f4a777de..9eafdbbb401c 100644 --- a/include/linux/netfilter/xt_NFQUEUE.h +++ b/include/linux/netfilter/xt_NFQUEUE.h | |||
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 { | |||
20 | __u16 queues_total; | 20 | __u16 queues_total; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | struct xt_NFQ_info_v2 { | ||
24 | __u16 queuenum; | ||
25 | __u16 queues_total; | ||
26 | __u16 bypass; | ||
27 | }; | ||
28 | |||
23 | #endif /* _XT_NFQ_TARGET_H */ | 29 | #endif /* _XT_NFQ_TARGET_H */ |
diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h index 2db543214ff5..7157318499c2 100644 --- a/include/linux/netfilter/xt_TCPOPTSTRIP.h +++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h | |||
@@ -1,13 +1,15 @@ | |||
1 | #ifndef _XT_TCPOPTSTRIP_H | 1 | #ifndef _XT_TCPOPTSTRIP_H |
2 | #define _XT_TCPOPTSTRIP_H | 2 | #define _XT_TCPOPTSTRIP_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define tcpoptstrip_set_bit(bmap, idx) \ | 6 | #define tcpoptstrip_set_bit(bmap, idx) \ |
5 | (bmap[(idx) >> 5] |= 1U << (idx & 31)) | 7 | (bmap[(idx) >> 5] |= 1U << (idx & 31)) |
6 | #define tcpoptstrip_test_bit(bmap, idx) \ | 8 | #define tcpoptstrip_test_bit(bmap, idx) \ |
7 | (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) | 9 | (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) |
8 | 10 | ||
9 | struct xt_tcpoptstrip_target_info { | 11 | struct xt_tcpoptstrip_target_info { |
10 | u_int32_t strip_bmap[8]; | 12 | __u32 strip_bmap[8]; |
11 | }; | 13 | }; |
12 | 14 | ||
13 | #endif /* _XT_TCPOPTSTRIP_H */ | 15 | #endif /* _XT_TCPOPTSTRIP_H */ |
diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 3f3d69361289..902043c2073f 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h | |||
@@ -1,19 +1,21 @@ | |||
1 | #ifndef _XT_TPROXY_H | 1 | #ifndef _XT_TPROXY_H |
2 | #define _XT_TPROXY_H | 2 | #define _XT_TPROXY_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | /* TPROXY target is capable of marking the packet to perform | 6 | /* TPROXY target is capable of marking the packet to perform |
5 | * redirection. We can get rid of that whenever we get support for | 7 | * redirection. We can get rid of that whenever we get support for |
6 | * mutliple targets in the same rule. */ | 8 | * mutliple targets in the same rule. */ |
7 | struct xt_tproxy_target_info { | 9 | struct xt_tproxy_target_info { |
8 | u_int32_t mark_mask; | 10 | __u32 mark_mask; |
9 | u_int32_t mark_value; | 11 | __u32 mark_value; |
10 | __be32 laddr; | 12 | __be32 laddr; |
11 | __be16 lport; | 13 | __be16 lport; |
12 | }; | 14 | }; |
13 | 15 | ||
14 | struct xt_tproxy_target_info_v1 { | 16 | struct xt_tproxy_target_info_v1 { |
15 | u_int32_t mark_mask; | 17 | __u32 mark_mask; |
16 | u_int32_t mark_value; | 18 | __u32 mark_value; |
17 | union nf_inet_addr laddr; | 19 | union nf_inet_addr laddr; |
18 | __be16 lport; | 20 | __be16 lport; |
19 | }; | 21 | }; |
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 886682656f09..9b883c8fbf54 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h | |||
@@ -1,15 +1,17 @@ | |||
1 | #ifndef _XT_CLUSTER_MATCH_H | 1 | #ifndef _XT_CLUSTER_MATCH_H |
2 | #define _XT_CLUSTER_MATCH_H | 2 | #define _XT_CLUSTER_MATCH_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum xt_cluster_flags { | 6 | enum xt_cluster_flags { |
5 | XT_CLUSTER_F_INV = (1 << 0) | 7 | XT_CLUSTER_F_INV = (1 << 0) |
6 | }; | 8 | }; |
7 | 9 | ||
8 | struct xt_cluster_match_info { | 10 | struct xt_cluster_match_info { |
9 | u_int32_t total_nodes; | 11 | __u32 total_nodes; |
10 | u_int32_t node_mask; | 12 | __u32 node_mask; |
11 | u_int32_t hash_seed; | 13 | __u32 hash_seed; |
12 | u_int32_t flags; | 14 | __u32 flags; |
13 | }; | 15 | }; |
14 | 16 | ||
15 | #define XT_CLUSTER_NODES_MAX 32 | 17 | #define XT_CLUSTER_NODES_MAX 32 |
diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h index eacfedc6b5d0..0ea5e79f5bd7 100644 --- a/include/linux/netfilter/xt_comment.h +++ b/include/linux/netfilter/xt_comment.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #define XT_MAX_COMMENT_LEN 256 | 4 | #define XT_MAX_COMMENT_LEN 256 |
5 | 5 | ||
6 | struct xt_comment_info { | 6 | struct xt_comment_info { |
7 | unsigned char comment[XT_MAX_COMMENT_LEN]; | 7 | char comment[XT_MAX_COMMENT_LEN]; |
8 | }; | 8 | }; |
9 | 9 | ||
10 | #endif /* XT_COMMENT_H */ | 10 | #endif /* XT_COMMENT_H */ |
diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h index 7e3284bcbd2b..0ca66e97acbc 100644 --- a/include/linux/netfilter/xt_connlimit.h +++ b/include/linux/netfilter/xt_connlimit.h | |||
@@ -1,8 +1,15 @@ | |||
1 | #ifndef _XT_CONNLIMIT_H | 1 | #ifndef _XT_CONNLIMIT_H |
2 | #define _XT_CONNLIMIT_H | 2 | #define _XT_CONNLIMIT_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct xt_connlimit_data; | 6 | struct xt_connlimit_data; |
5 | 7 | ||
8 | enum { | ||
9 | XT_CONNLIMIT_INVERT = 1 << 0, | ||
10 | XT_CONNLIMIT_DADDR = 1 << 1, | ||
11 | }; | ||
12 | |||
6 | struct xt_connlimit_info { | 13 | struct xt_connlimit_info { |
7 | union { | 14 | union { |
8 | union nf_inet_addr mask; | 15 | union nf_inet_addr mask; |
@@ -13,7 +20,14 @@ struct xt_connlimit_info { | |||
13 | }; | 20 | }; |
14 | #endif | 21 | #endif |
15 | }; | 22 | }; |
16 | unsigned int limit, inverse; | 23 | unsigned int limit; |
24 | union { | ||
25 | /* revision 0 */ | ||
26 | unsigned int inverse; | ||
27 | |||
28 | /* revision 1 */ | ||
29 | __u32 flags; | ||
30 | }; | ||
17 | 31 | ||
18 | /* Used internally by the kernel */ | 32 | /* Used internally by the kernel */ |
19 | struct xt_connlimit_data *data __attribute__((aligned(8))); | 33 | struct xt_connlimit_data *data __attribute__((aligned(8))); |
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 54f47a2f6152..74b904d8f99c 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h | |||
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 { | |||
58 | __u16 state_mask, status_mask; | 58 | __u16 state_mask, status_mask; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct xt_conntrack_mtinfo3 { | ||
62 | union nf_inet_addr origsrc_addr, origsrc_mask; | ||
63 | union nf_inet_addr origdst_addr, origdst_mask; | ||
64 | union nf_inet_addr replsrc_addr, replsrc_mask; | ||
65 | union nf_inet_addr repldst_addr, repldst_mask; | ||
66 | __u32 expires_min, expires_max; | ||
67 | __u16 l4proto; | ||
68 | __u16 origsrc_port, origdst_port; | ||
69 | __u16 replsrc_port, repldst_port; | ||
70 | __u16 match_flags, invert_flags; | ||
71 | __u16 state_mask, status_mask; | ||
72 | __u16 origsrc_port_high, origdst_port_high; | ||
73 | __u16 replsrc_port_high, repldst_port_high; | ||
74 | }; | ||
75 | |||
61 | #endif /*_XT_CONNTRACK_H*/ | 76 | #endif /*_XT_CONNTRACK_H*/ |
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index b0d28c659ab7..ca6e03e47a17 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XT_QUOTA_H | 1 | #ifndef _XT_QUOTA_H |
2 | #define _XT_QUOTA_H | 2 | #define _XT_QUOTA_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum xt_quota_flags { | 6 | enum xt_quota_flags { |
5 | XT_QUOTA_INVERT = 0x1, | 7 | XT_QUOTA_INVERT = 0x1, |
6 | }; | 8 | }; |
@@ -9,9 +11,9 @@ enum xt_quota_flags { | |||
9 | struct xt_quota_priv; | 11 | struct xt_quota_priv; |
10 | 12 | ||
11 | struct xt_quota_info { | 13 | struct xt_quota_info { |
12 | u_int32_t flags; | 14 | __u32 flags; |
13 | u_int32_t pad; | 15 | __u32 pad; |
14 | aligned_u64 quota; | 16 | aligned_u64 quota; |
15 | 17 | ||
16 | /* Used internally by the kernel */ | 18 | /* Used internally by the kernel */ |
17 | struct xt_quota_priv *master; | 19 | struct xt_quota_priv *master; |
diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h index 6f475b8ff34b..26d7217bd4f1 100644 --- a/include/linux/netfilter/xt_socket.h +++ b/include/linux/netfilter/xt_socket.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XT_SOCKET_H | 1 | #ifndef _XT_SOCKET_H |
2 | #define _XT_SOCKET_H | 2 | #define _XT_SOCKET_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum { | 6 | enum { |
5 | XT_SOCKET_TRANSPARENT = 1 << 0, | 7 | XT_SOCKET_TRANSPARENT = 1 << 0, |
6 | }; | 8 | }; |
diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h index 14b6df412c9f..7c37fac576c4 100644 --- a/include/linux/netfilter/xt_time.h +++ b/include/linux/netfilter/xt_time.h | |||
@@ -1,14 +1,16 @@ | |||
1 | #ifndef _XT_TIME_H | 1 | #ifndef _XT_TIME_H |
2 | #define _XT_TIME_H 1 | 2 | #define _XT_TIME_H 1 |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct xt_time_info { | 6 | struct xt_time_info { |
5 | u_int32_t date_start; | 7 | __u32 date_start; |
6 | u_int32_t date_stop; | 8 | __u32 date_stop; |
7 | u_int32_t daytime_start; | 9 | __u32 daytime_start; |
8 | u_int32_t daytime_stop; | 10 | __u32 daytime_stop; |
9 | u_int32_t monthdays_match; | 11 | __u32 monthdays_match; |
10 | u_int8_t weekdays_match; | 12 | __u8 weekdays_match; |
11 | u_int8_t flags; | 13 | __u8 flags; |
12 | }; | 14 | }; |
13 | 15 | ||
14 | enum { | 16 | enum { |
diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h index 9947f56cdbdd..04d1bfea03c2 100644 --- a/include/linux/netfilter/xt_u32.h +++ b/include/linux/netfilter/xt_u32.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XT_U32_H | 1 | #ifndef _XT_U32_H |
2 | #define _XT_U32_H 1 | 2 | #define _XT_U32_H 1 |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum xt_u32_ops { | 6 | enum xt_u32_ops { |
5 | XT_U32_AND, | 7 | XT_U32_AND, |
6 | XT_U32_LEFTSH, | 8 | XT_U32_LEFTSH, |
@@ -9,13 +11,13 @@ enum xt_u32_ops { | |||
9 | }; | 11 | }; |
10 | 12 | ||
11 | struct xt_u32_location_element { | 13 | struct xt_u32_location_element { |
12 | u_int32_t number; | 14 | __u32 number; |
13 | u_int8_t nextop; | 15 | __u8 nextop; |
14 | }; | 16 | }; |
15 | 17 | ||
16 | struct xt_u32_value_element { | 18 | struct xt_u32_value_element { |
17 | u_int32_t min; | 19 | __u32 min; |
18 | u_int32_t max; | 20 | __u32 max; |
19 | }; | 21 | }; |
20 | 22 | ||
21 | /* | 23 | /* |
@@ -27,14 +29,14 @@ struct xt_u32_value_element { | |||
27 | struct xt_u32_test { | 29 | struct xt_u32_test { |
28 | struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; | 30 | struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; |
29 | struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; | 31 | struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; |
30 | u_int8_t nnums; | 32 | __u8 nnums; |
31 | u_int8_t nvalues; | 33 | __u8 nvalues; |
32 | }; | 34 | }; |
33 | 35 | ||
34 | struct xt_u32 { | 36 | struct xt_u32 { |
35 | struct xt_u32_test tests[XT_U32_MAXSIZE+1]; | 37 | struct xt_u32_test tests[XT_U32_MAXSIZE+1]; |
36 | u_int8_t ntests; | 38 | __u8 ntests; |
37 | u_int8_t invert; | 39 | __u8 invert; |
38 | }; | 40 | }; |
39 | 41 | ||
40 | #endif /* _XT_U32_H */ | 42 | #endif /* _XT_U32_H */ |
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index c73ef0b18bdc..be5be1577a56 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_802_3_H | 1 | #ifndef __LINUX_BRIDGE_EBT_802_3_H |
2 | #define __LINUX_BRIDGE_EBT_802_3_H | 2 | #define __LINUX_BRIDGE_EBT_802_3_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_802_3_SAP 0x01 | 6 | #define EBT_802_3_SAP 0x01 |
5 | #define EBT_802_3_TYPE 0x02 | 7 | #define EBT_802_3_TYPE 0x02 |
6 | 8 | ||
@@ -24,24 +26,24 @@ | |||
24 | 26 | ||
25 | /* ui has one byte ctrl, ni has two */ | 27 | /* ui has one byte ctrl, ni has two */ |
26 | struct hdr_ui { | 28 | struct hdr_ui { |
27 | uint8_t dsap; | 29 | __u8 dsap; |
28 | uint8_t ssap; | 30 | __u8 ssap; |
29 | uint8_t ctrl; | 31 | __u8 ctrl; |
30 | uint8_t orig[3]; | 32 | __u8 orig[3]; |
31 | __be16 type; | 33 | __be16 type; |
32 | }; | 34 | }; |
33 | 35 | ||
34 | struct hdr_ni { | 36 | struct hdr_ni { |
35 | uint8_t dsap; | 37 | __u8 dsap; |
36 | uint8_t ssap; | 38 | __u8 ssap; |
37 | __be16 ctrl; | 39 | __be16 ctrl; |
38 | uint8_t orig[3]; | 40 | __u8 orig[3]; |
39 | __be16 type; | 41 | __be16 type; |
40 | }; | 42 | }; |
41 | 43 | ||
42 | struct ebt_802_3_hdr { | 44 | struct ebt_802_3_hdr { |
43 | uint8_t daddr[6]; | 45 | __u8 daddr[6]; |
44 | uint8_t saddr[6]; | 46 | __u8 saddr[6]; |
45 | __be16 len; | 47 | __be16 len; |
46 | union { | 48 | union { |
47 | struct hdr_ui ui; | 49 | struct hdr_ui ui; |
@@ -59,10 +61,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) | |||
59 | #endif | 61 | #endif |
60 | 62 | ||
61 | struct ebt_802_3_info { | 63 | struct ebt_802_3_info { |
62 | uint8_t sap; | 64 | __u8 sap; |
63 | __be16 type; | 65 | __be16 type; |
64 | uint8_t bitmask; | 66 | __u8 bitmask; |
65 | uint8_t invflags; | 67 | __u8 invflags; |
66 | }; | 68 | }; |
67 | 69 | ||
68 | #endif | 70 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h index 0009558609a7..bd4e3ad0b706 100644 --- a/include/linux/netfilter_bridge/ebt_among.h +++ b/include/linux/netfilter_bridge/ebt_among.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_AMONG_H | 1 | #ifndef __LINUX_BRIDGE_EBT_AMONG_H |
2 | #define __LINUX_BRIDGE_EBT_AMONG_H | 2 | #define __LINUX_BRIDGE_EBT_AMONG_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_AMONG_DST 0x01 | 6 | #define EBT_AMONG_DST 0x01 |
5 | #define EBT_AMONG_SRC 0x02 | 7 | #define EBT_AMONG_SRC 0x02 |
6 | 8 | ||
@@ -30,7 +32,7 @@ | |||
30 | */ | 32 | */ |
31 | 33 | ||
32 | struct ebt_mac_wormhash_tuple { | 34 | struct ebt_mac_wormhash_tuple { |
33 | uint32_t cmp[2]; | 35 | __u32 cmp[2]; |
34 | __be32 ip; | 36 | __be32 ip; |
35 | }; | 37 | }; |
36 | 38 | ||
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h index cbf4843b6b0f..522f3e427f49 100644 --- a/include/linux/netfilter_bridge/ebt_arp.h +++ b/include/linux/netfilter_bridge/ebt_arp.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_ARP_H | 1 | #ifndef __LINUX_BRIDGE_EBT_ARP_H |
2 | #define __LINUX_BRIDGE_EBT_ARP_H | 2 | #define __LINUX_BRIDGE_EBT_ARP_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_ARP_OPCODE 0x01 | 6 | #define EBT_ARP_OPCODE 0x01 |
5 | #define EBT_ARP_HTYPE 0x02 | 7 | #define EBT_ARP_HTYPE 0x02 |
6 | #define EBT_ARP_PTYPE 0x04 | 8 | #define EBT_ARP_PTYPE 0x04 |
@@ -27,8 +29,8 @@ struct ebt_arp_info | |||
27 | unsigned char smmsk[ETH_ALEN]; | 29 | unsigned char smmsk[ETH_ALEN]; |
28 | unsigned char dmaddr[ETH_ALEN]; | 30 | unsigned char dmaddr[ETH_ALEN]; |
29 | unsigned char dmmsk[ETH_ALEN]; | 31 | unsigned char dmmsk[ETH_ALEN]; |
30 | uint8_t bitmask; | 32 | __u8 bitmask; |
31 | uint8_t invflags; | 33 | __u8 invflags; |
32 | }; | 34 | }; |
33 | 35 | ||
34 | #endif | 36 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h index 6a708fb92241..c4bbc41b0ea4 100644 --- a/include/linux/netfilter_bridge/ebt_ip.h +++ b/include/linux/netfilter_bridge/ebt_ip.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #ifndef __LINUX_BRIDGE_EBT_IP_H | 15 | #ifndef __LINUX_BRIDGE_EBT_IP_H |
16 | #define __LINUX_BRIDGE_EBT_IP_H | 16 | #define __LINUX_BRIDGE_EBT_IP_H |
17 | 17 | ||
18 | #include <linux/types.h> | ||
19 | |||
18 | #define EBT_IP_SOURCE 0x01 | 20 | #define EBT_IP_SOURCE 0x01 |
19 | #define EBT_IP_DEST 0x02 | 21 | #define EBT_IP_DEST 0x02 |
20 | #define EBT_IP_TOS 0x04 | 22 | #define EBT_IP_TOS 0x04 |
@@ -31,12 +33,12 @@ struct ebt_ip_info { | |||
31 | __be32 daddr; | 33 | __be32 daddr; |
32 | __be32 smsk; | 34 | __be32 smsk; |
33 | __be32 dmsk; | 35 | __be32 dmsk; |
34 | uint8_t tos; | 36 | __u8 tos; |
35 | uint8_t protocol; | 37 | __u8 protocol; |
36 | uint8_t bitmask; | 38 | __u8 bitmask; |
37 | uint8_t invflags; | 39 | __u8 invflags; |
38 | uint16_t sport[2]; | 40 | __u16 sport[2]; |
39 | uint16_t dport[2]; | 41 | __u16 dport[2]; |
40 | }; | 42 | }; |
41 | 43 | ||
42 | #endif | 44 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index e5de98701519..42b889682721 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h | |||
@@ -12,14 +12,19 @@ | |||
12 | #ifndef __LINUX_BRIDGE_EBT_IP6_H | 12 | #ifndef __LINUX_BRIDGE_EBT_IP6_H |
13 | #define __LINUX_BRIDGE_EBT_IP6_H | 13 | #define __LINUX_BRIDGE_EBT_IP6_H |
14 | 14 | ||
15 | #include <linux/types.h> | ||
16 | |||
15 | #define EBT_IP6_SOURCE 0x01 | 17 | #define EBT_IP6_SOURCE 0x01 |
16 | #define EBT_IP6_DEST 0x02 | 18 | #define EBT_IP6_DEST 0x02 |
17 | #define EBT_IP6_TCLASS 0x04 | 19 | #define EBT_IP6_TCLASS 0x04 |
18 | #define EBT_IP6_PROTO 0x08 | 20 | #define EBT_IP6_PROTO 0x08 |
19 | #define EBT_IP6_SPORT 0x10 | 21 | #define EBT_IP6_SPORT 0x10 |
20 | #define EBT_IP6_DPORT 0x20 | 22 | #define EBT_IP6_DPORT 0x20 |
23 | #define EBT_IP6_ICMP6 0x40 | ||
24 | |||
21 | #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ | 25 | #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ |
22 | EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) | 26 | EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \ |
27 | EBT_IP6_ICMP6) | ||
23 | #define EBT_IP6_MATCH "ip6" | 28 | #define EBT_IP6_MATCH "ip6" |
24 | 29 | ||
25 | /* the same values are used for the invflags */ | 30 | /* the same values are used for the invflags */ |
@@ -28,12 +33,18 @@ struct ebt_ip6_info { | |||
28 | struct in6_addr daddr; | 33 | struct in6_addr daddr; |
29 | struct in6_addr smsk; | 34 | struct in6_addr smsk; |
30 | struct in6_addr dmsk; | 35 | struct in6_addr dmsk; |
31 | uint8_t tclass; | 36 | __u8 tclass; |
32 | uint8_t protocol; | 37 | __u8 protocol; |
33 | uint8_t bitmask; | 38 | __u8 bitmask; |
34 | uint8_t invflags; | 39 | __u8 invflags; |
35 | uint16_t sport[2]; | 40 | union { |
36 | uint16_t dport[2]; | 41 | __u16 sport[2]; |
42 | __u8 icmpv6_type[2]; | ||
43 | }; | ||
44 | union { | ||
45 | __u16 dport[2]; | ||
46 | __u8 icmpv6_code[2]; | ||
47 | }; | ||
37 | }; | 48 | }; |
38 | 49 | ||
39 | #endif | 50 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h index 4bf76b751676..66d80b30ba0e 100644 --- a/include/linux/netfilter_bridge/ebt_limit.h +++ b/include/linux/netfilter_bridge/ebt_limit.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_LIMIT_H | 1 | #ifndef __LINUX_BRIDGE_EBT_LIMIT_H |
2 | #define __LINUX_BRIDGE_EBT_LIMIT_H | 2 | #define __LINUX_BRIDGE_EBT_LIMIT_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_LIMIT_MATCH "limit" | 6 | #define EBT_LIMIT_MATCH "limit" |
5 | 7 | ||
6 | /* timings are in milliseconds. */ | 8 | /* timings are in milliseconds. */ |
@@ -10,13 +12,13 @@ | |||
10 | seconds, or one every 59 hours. */ | 12 | seconds, or one every 59 hours. */ |
11 | 13 | ||
12 | struct ebt_limit_info { | 14 | struct ebt_limit_info { |
13 | u_int32_t avg; /* Average secs between packets * scale */ | 15 | __u32 avg; /* Average secs between packets * scale */ |
14 | u_int32_t burst; /* Period multiplier for upper limit. */ | 16 | __u32 burst; /* Period multiplier for upper limit. */ |
15 | 17 | ||
16 | /* Used internally by the kernel */ | 18 | /* Used internally by the kernel */ |
17 | unsigned long prev; | 19 | unsigned long prev; |
18 | u_int32_t credit; | 20 | __u32 credit; |
19 | u_int32_t credit_cap, cost; | 21 | __u32 credit_cap, cost; |
20 | }; | 22 | }; |
21 | 23 | ||
22 | #endif | 24 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index cc2cdfb764bc..7e7f1d1fe494 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_LOG_H | 1 | #ifndef __LINUX_BRIDGE_EBT_LOG_H |
2 | #define __LINUX_BRIDGE_EBT_LOG_H | 2 | #define __LINUX_BRIDGE_EBT_LOG_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ | 6 | #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ |
5 | #define EBT_LOG_ARP 0x02 | 7 | #define EBT_LOG_ARP 0x02 |
6 | #define EBT_LOG_NFLOG 0x04 | 8 | #define EBT_LOG_NFLOG 0x04 |
@@ -10,9 +12,9 @@ | |||
10 | #define EBT_LOG_WATCHER "log" | 12 | #define EBT_LOG_WATCHER "log" |
11 | 13 | ||
12 | struct ebt_log_info { | 14 | struct ebt_log_info { |
13 | uint8_t loglevel; | 15 | __u8 loglevel; |
14 | uint8_t prefix[EBT_LOG_PREFIX_SIZE]; | 16 | __u8 prefix[EBT_LOG_PREFIX_SIZE]; |
15 | uint32_t bitmask; | 17 | __u32 bitmask; |
16 | }; | 18 | }; |
17 | 19 | ||
18 | #endif | 20 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h index 9ceb10ec0ed6..410f9e5a71d4 100644 --- a/include/linux/netfilter_bridge/ebt_mark_m.h +++ b/include/linux/netfilter_bridge/ebt_mark_m.h | |||
@@ -1,13 +1,15 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_MARK_M_H | 1 | #ifndef __LINUX_BRIDGE_EBT_MARK_M_H |
2 | #define __LINUX_BRIDGE_EBT_MARK_M_H | 2 | #define __LINUX_BRIDGE_EBT_MARK_M_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_MARK_AND 0x01 | 6 | #define EBT_MARK_AND 0x01 |
5 | #define EBT_MARK_OR 0x02 | 7 | #define EBT_MARK_OR 0x02 |
6 | #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) | 8 | #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) |
7 | struct ebt_mark_m_info { | 9 | struct ebt_mark_m_info { |
8 | unsigned long mark, mask; | 10 | unsigned long mark, mask; |
9 | uint8_t invert; | 11 | __u8 invert; |
10 | uint8_t bitmask; | 12 | __u8 bitmask; |
11 | }; | 13 | }; |
12 | #define EBT_MARK_MATCH "mark_m" | 14 | #define EBT_MARK_MATCH "mark_m" |
13 | 15 | ||
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h index 052817849b83..df829fce9125 100644 --- a/include/linux/netfilter_bridge/ebt_nflog.h +++ b/include/linux/netfilter_bridge/ebt_nflog.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_NFLOG_H | 1 | #ifndef __LINUX_BRIDGE_EBT_NFLOG_H |
2 | #define __LINUX_BRIDGE_EBT_NFLOG_H | 2 | #define __LINUX_BRIDGE_EBT_NFLOG_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_NFLOG_MASK 0x0 | 6 | #define EBT_NFLOG_MASK 0x0 |
5 | 7 | ||
6 | #define EBT_NFLOG_PREFIX_SIZE 64 | 8 | #define EBT_NFLOG_PREFIX_SIZE 64 |
@@ -10,11 +12,11 @@ | |||
10 | #define EBT_NFLOG_DEFAULT_THRESHOLD 1 | 12 | #define EBT_NFLOG_DEFAULT_THRESHOLD 1 |
11 | 13 | ||
12 | struct ebt_nflog_info { | 14 | struct ebt_nflog_info { |
13 | u_int32_t len; | 15 | __u32 len; |
14 | u_int16_t group; | 16 | __u16 group; |
15 | u_int16_t threshold; | 17 | __u16 threshold; |
16 | u_int16_t flags; | 18 | __u16 flags; |
17 | u_int16_t pad; | 19 | __u16 pad; |
18 | char prefix[EBT_NFLOG_PREFIX_SIZE]; | 20 | char prefix[EBT_NFLOG_PREFIX_SIZE]; |
19 | }; | 21 | }; |
20 | 22 | ||
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h index 51a799840931..c241badcd036 100644 --- a/include/linux/netfilter_bridge/ebt_pkttype.h +++ b/include/linux/netfilter_bridge/ebt_pkttype.h | |||
@@ -1,9 +1,11 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H | 1 | #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H |
2 | #define __LINUX_BRIDGE_EBT_PKTTYPE_H | 2 | #define __LINUX_BRIDGE_EBT_PKTTYPE_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct ebt_pkttype_info { | 6 | struct ebt_pkttype_info { |
5 | uint8_t pkt_type; | 7 | __u8 pkt_type; |
6 | uint8_t invert; | 8 | __u8 invert; |
7 | }; | 9 | }; |
8 | #define EBT_PKTTYPE_MATCH "pkttype" | 10 | #define EBT_PKTTYPE_MATCH "pkttype" |
9 | 11 | ||
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h index e503a0aa2728..1025b9f5fb7d 100644 --- a/include/linux/netfilter_bridge/ebt_stp.h +++ b/include/linux/netfilter_bridge/ebt_stp.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_STP_H | 1 | #ifndef __LINUX_BRIDGE_EBT_STP_H |
2 | #define __LINUX_BRIDGE_EBT_STP_H | 2 | #define __LINUX_BRIDGE_EBT_STP_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_STP_TYPE 0x0001 | 6 | #define EBT_STP_TYPE 0x0001 |
5 | 7 | ||
6 | #define EBT_STP_FLAGS 0x0002 | 8 | #define EBT_STP_FLAGS 0x0002 |
@@ -21,24 +23,24 @@ | |||
21 | #define EBT_STP_MATCH "stp" | 23 | #define EBT_STP_MATCH "stp" |
22 | 24 | ||
23 | struct ebt_stp_config_info { | 25 | struct ebt_stp_config_info { |
24 | uint8_t flags; | 26 | __u8 flags; |
25 | uint16_t root_priol, root_priou; | 27 | __u16 root_priol, root_priou; |
26 | char root_addr[6], root_addrmsk[6]; | 28 | char root_addr[6], root_addrmsk[6]; |
27 | uint32_t root_costl, root_costu; | 29 | __u32 root_costl, root_costu; |
28 | uint16_t sender_priol, sender_priou; | 30 | __u16 sender_priol, sender_priou; |
29 | char sender_addr[6], sender_addrmsk[6]; | 31 | char sender_addr[6], sender_addrmsk[6]; |
30 | uint16_t portl, portu; | 32 | __u16 portl, portu; |
31 | uint16_t msg_agel, msg_ageu; | 33 | __u16 msg_agel, msg_ageu; |
32 | uint16_t max_agel, max_ageu; | 34 | __u16 max_agel, max_ageu; |
33 | uint16_t hello_timel, hello_timeu; | 35 | __u16 hello_timel, hello_timeu; |
34 | uint16_t forward_delayl, forward_delayu; | 36 | __u16 forward_delayl, forward_delayu; |
35 | }; | 37 | }; |
36 | 38 | ||
37 | struct ebt_stp_info { | 39 | struct ebt_stp_info { |
38 | uint8_t type; | 40 | __u8 type; |
39 | struct ebt_stp_config_info config; | 41 | struct ebt_stp_config_info config; |
40 | uint16_t bitmask; | 42 | __u16 bitmask; |
41 | uint16_t invflags; | 43 | __u16 invflags; |
42 | }; | 44 | }; |
43 | 45 | ||
44 | #endif | 46 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h index b677e2671541..89a6becb5269 100644 --- a/include/linux/netfilter_bridge/ebt_ulog.h +++ b/include/linux/netfilter_bridge/ebt_ulog.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _EBT_ULOG_H | 1 | #ifndef _EBT_ULOG_H |
2 | #define _EBT_ULOG_H | 2 | #define _EBT_ULOG_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_ULOG_DEFAULT_NLGROUP 0 | 6 | #define EBT_ULOG_DEFAULT_NLGROUP 0 |
5 | #define EBT_ULOG_DEFAULT_QTHRESHOLD 1 | 7 | #define EBT_ULOG_DEFAULT_QTHRESHOLD 1 |
6 | #define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */ | 8 | #define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */ |
@@ -10,7 +12,7 @@ | |||
10 | #define EBT_ULOG_VERSION 1 | 12 | #define EBT_ULOG_VERSION 1 |
11 | 13 | ||
12 | struct ebt_ulog_info { | 14 | struct ebt_ulog_info { |
13 | uint32_t nlgroup; | 15 | __u32 nlgroup; |
14 | unsigned int cprange; | 16 | unsigned int cprange; |
15 | unsigned int qthreshold; | 17 | unsigned int qthreshold; |
16 | char prefix[EBT_ULOG_PREFIX_LEN]; | 18 | char prefix[EBT_ULOG_PREFIX_LEN]; |
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h index 1d98be4031e7..967d1d5cf98d 100644 --- a/include/linux/netfilter_bridge/ebt_vlan.h +++ b/include/linux/netfilter_bridge/ebt_vlan.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __LINUX_BRIDGE_EBT_VLAN_H | 1 | #ifndef __LINUX_BRIDGE_EBT_VLAN_H |
2 | #define __LINUX_BRIDGE_EBT_VLAN_H | 2 | #define __LINUX_BRIDGE_EBT_VLAN_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define EBT_VLAN_ID 0x01 | 6 | #define EBT_VLAN_ID 0x01 |
5 | #define EBT_VLAN_PRIO 0x02 | 7 | #define EBT_VLAN_PRIO 0x02 |
6 | #define EBT_VLAN_ENCAP 0x04 | 8 | #define EBT_VLAN_ENCAP 0x04 |
@@ -8,12 +10,12 @@ | |||
8 | #define EBT_VLAN_MATCH "vlan" | 10 | #define EBT_VLAN_MATCH "vlan" |
9 | 11 | ||
10 | struct ebt_vlan_info { | 12 | struct ebt_vlan_info { |
11 | uint16_t id; /* VLAN ID {1-4095} */ | 13 | __u16 id; /* VLAN ID {1-4095} */ |
12 | uint8_t prio; /* VLAN User Priority {0-7} */ | 14 | __u8 prio; /* VLAN User Priority {0-7} */ |
13 | __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ | 15 | __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ |
14 | uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg, | 16 | __u8 bitmask; /* Args bitmask bit 1=1 - ID arg, |
15 | bit 2=1 User-Priority arg, bit 3=1 encap*/ | 17 | bit 2=1 User-Priority arg, bit 3=1 encap*/ |
16 | uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, | 18 | __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, |
17 | bit 2=1 - inversed Pirority arg */ | 19 | bit 2=1 - inversed Pirority arg */ |
18 | }; | 20 | }; |
19 | 21 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index e5a3687c8a72..c6a204c97047 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _IPT_CLUSTERIP_H_target | 1 | #ifndef _IPT_CLUSTERIP_H_target |
2 | #define _IPT_CLUSTERIP_H_target | 2 | #define _IPT_CLUSTERIP_H_target |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum clusterip_hashmode { | 6 | enum clusterip_hashmode { |
5 | CLUSTERIP_HASHMODE_SIP = 0, | 7 | CLUSTERIP_HASHMODE_SIP = 0, |
6 | CLUSTERIP_HASHMODE_SIP_SPT, | 8 | CLUSTERIP_HASHMODE_SIP_SPT, |
@@ -17,15 +19,15 @@ struct clusterip_config; | |||
17 | 19 | ||
18 | struct ipt_clusterip_tgt_info { | 20 | struct ipt_clusterip_tgt_info { |
19 | 21 | ||
20 | u_int32_t flags; | 22 | __u32 flags; |
21 | 23 | ||
22 | /* only relevant for new ones */ | 24 | /* only relevant for new ones */ |
23 | u_int8_t clustermac[6]; | 25 | __u8 clustermac[6]; |
24 | u_int16_t num_total_nodes; | 26 | __u16 num_total_nodes; |
25 | u_int16_t num_local_nodes; | 27 | __u16 num_local_nodes; |
26 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | 28 | __u16 local_nodes[CLUSTERIP_MAX_NODES]; |
27 | u_int32_t hash_mode; | 29 | __u32 hash_mode; |
28 | u_int32_t hash_initval; | 30 | __u32 hash_initval; |
29 | 31 | ||
30 | /* Used internally by the kernel */ | 32 | /* Used internally by the kernel */ |
31 | struct clusterip_config *config; | 33 | struct clusterip_config *config; |
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h index 7ca45918ab8e..bb88d5315a4d 100644 --- a/include/linux/netfilter_ipv4/ipt_ECN.h +++ b/include/linux/netfilter_ipv4/ipt_ECN.h | |||
@@ -8,6 +8,8 @@ | |||
8 | */ | 8 | */ |
9 | #ifndef _IPT_ECN_TARGET_H | 9 | #ifndef _IPT_ECN_TARGET_H |
10 | #define _IPT_ECN_TARGET_H | 10 | #define _IPT_ECN_TARGET_H |
11 | |||
12 | #include <linux/types.h> | ||
11 | #include <linux/netfilter/xt_DSCP.h> | 13 | #include <linux/netfilter/xt_DSCP.h> |
12 | 14 | ||
13 | #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) | 15 | #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) |
@@ -19,11 +21,11 @@ | |||
19 | #define IPT_ECN_OP_MASK 0xce | 21 | #define IPT_ECN_OP_MASK 0xce |
20 | 22 | ||
21 | struct ipt_ECN_info { | 23 | struct ipt_ECN_info { |
22 | u_int8_t operation; /* bitset of operations */ | 24 | __u8 operation; /* bitset of operations */ |
23 | u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ | 25 | __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ |
24 | union { | 26 | union { |
25 | struct { | 27 | struct { |
26 | u_int8_t ece:1, cwr:1; /* TCP ECT bits */ | 28 | __u8 ece:1, cwr:1; /* TCP ECT bits */ |
27 | } tcp; | 29 | } tcp; |
28 | } proto; | 30 | } proto; |
29 | }; | 31 | }; |
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h index 2529660c5b38..5bca78267afd 100644 --- a/include/linux/netfilter_ipv4/ipt_SAME.h +++ b/include/linux/netfilter_ipv4/ipt_SAME.h | |||
@@ -1,15 +1,17 @@ | |||
1 | #ifndef _IPT_SAME_H | 1 | #ifndef _IPT_SAME_H |
2 | #define _IPT_SAME_H | 2 | #define _IPT_SAME_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define IPT_SAME_MAX_RANGE 10 | 6 | #define IPT_SAME_MAX_RANGE 10 |
5 | 7 | ||
6 | #define IPT_SAME_NODST 0x01 | 8 | #define IPT_SAME_NODST 0x01 |
7 | 9 | ||
8 | struct ipt_same_info { | 10 | struct ipt_same_info { |
9 | unsigned char info; | 11 | unsigned char info; |
10 | u_int32_t rangesize; | 12 | __u32 rangesize; |
11 | u_int32_t ipnum; | 13 | __u32 ipnum; |
12 | u_int32_t *iparray; | 14 | __u32 *iparray; |
13 | 15 | ||
14 | /* hangs off end. */ | 16 | /* hangs off end. */ |
15 | struct nf_nat_range range[IPT_SAME_MAX_RANGE]; | 17 | struct nf_nat_range range[IPT_SAME_MAX_RANGE]; |
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h index ee6611edc112..f6ac169d92f9 100644 --- a/include/linux/netfilter_ipv4/ipt_TTL.h +++ b/include/linux/netfilter_ipv4/ipt_TTL.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #ifndef _IPT_TTL_H | 4 | #ifndef _IPT_TTL_H |
5 | #define _IPT_TTL_H | 5 | #define _IPT_TTL_H |
6 | 6 | ||
7 | #include <linux/types.h> | ||
8 | |||
7 | enum { | 9 | enum { |
8 | IPT_TTL_SET = 0, | 10 | IPT_TTL_SET = 0, |
9 | IPT_TTL_INC, | 11 | IPT_TTL_INC, |
@@ -13,8 +15,8 @@ enum { | |||
13 | #define IPT_TTL_MAXMODE IPT_TTL_DEC | 15 | #define IPT_TTL_MAXMODE IPT_TTL_DEC |
14 | 16 | ||
15 | struct ipt_TTL_info { | 17 | struct ipt_TTL_info { |
16 | u_int8_t mode; | 18 | __u8 mode; |
17 | u_int8_t ttl; | 19 | __u8 ttl; |
18 | }; | 20 | }; |
19 | 21 | ||
20 | 22 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h index 446de6aef983..0da42237c8da 100644 --- a/include/linux/netfilter_ipv4/ipt_addrtype.h +++ b/include/linux/netfilter_ipv4/ipt_addrtype.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _IPT_ADDRTYPE_H | 1 | #ifndef _IPT_ADDRTYPE_H |
2 | #define _IPT_ADDRTYPE_H | 2 | #define _IPT_ADDRTYPE_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum { | 6 | enum { |
5 | IPT_ADDRTYPE_INVERT_SOURCE = 0x0001, | 7 | IPT_ADDRTYPE_INVERT_SOURCE = 0x0001, |
6 | IPT_ADDRTYPE_INVERT_DEST = 0x0002, | 8 | IPT_ADDRTYPE_INVERT_DEST = 0x0002, |
@@ -9,17 +11,17 @@ enum { | |||
9 | }; | 11 | }; |
10 | 12 | ||
11 | struct ipt_addrtype_info_v1 { | 13 | struct ipt_addrtype_info_v1 { |
12 | u_int16_t source; /* source-type mask */ | 14 | __u16 source; /* source-type mask */ |
13 | u_int16_t dest; /* dest-type mask */ | 15 | __u16 dest; /* dest-type mask */ |
14 | u_int32_t flags; | 16 | __u32 flags; |
15 | }; | 17 | }; |
16 | 18 | ||
17 | /* revision 0 */ | 19 | /* revision 0 */ |
18 | struct ipt_addrtype_info { | 20 | struct ipt_addrtype_info { |
19 | u_int16_t source; /* source-type mask */ | 21 | __u16 source; /* source-type mask */ |
20 | u_int16_t dest; /* dest-type mask */ | 22 | __u16 dest; /* dest-type mask */ |
21 | u_int32_t invert_source; | 23 | __u32 invert_source; |
22 | u_int32_t invert_dest; | 24 | __u32 invert_dest; |
23 | }; | 25 | }; |
24 | 26 | ||
25 | #endif | 27 | #endif |
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h index 2e555b4d05e3..4e02bb0119e3 100644 --- a/include/linux/netfilter_ipv4/ipt_ah.h +++ b/include/linux/netfilter_ipv4/ipt_ah.h | |||
@@ -1,9 +1,11 @@ | |||
1 | #ifndef _IPT_AH_H | 1 | #ifndef _IPT_AH_H |
2 | #define _IPT_AH_H | 2 | #define _IPT_AH_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct ipt_ah { | 6 | struct ipt_ah { |
5 | u_int32_t spis[2]; /* Security Parameter Index */ | 7 | __u32 spis[2]; /* Security Parameter Index */ |
6 | u_int8_t invflags; /* Inverse flags */ | 8 | __u8 invflags; /* Inverse flags */ |
7 | }; | 9 | }; |
8 | 10 | ||
9 | 11 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index 9945baa4ccd7..eabf95fb7d3e 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h | |||
@@ -8,6 +8,8 @@ | |||
8 | */ | 8 | */ |
9 | #ifndef _IPT_ECN_H | 9 | #ifndef _IPT_ECN_H |
10 | #define _IPT_ECN_H | 10 | #define _IPT_ECN_H |
11 | |||
12 | #include <linux/types.h> | ||
11 | #include <linux/netfilter/xt_dscp.h> | 13 | #include <linux/netfilter/xt_dscp.h> |
12 | 14 | ||
13 | #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) | 15 | #define IPT_ECN_IP_MASK (~XT_DSCP_MASK) |
@@ -20,12 +22,12 @@ | |||
20 | 22 | ||
21 | /* match info */ | 23 | /* match info */ |
22 | struct ipt_ecn_info { | 24 | struct ipt_ecn_info { |
23 | u_int8_t operation; | 25 | __u8 operation; |
24 | u_int8_t invert; | 26 | __u8 invert; |
25 | u_int8_t ip_ect; | 27 | __u8 ip_ect; |
26 | union { | 28 | union { |
27 | struct { | 29 | struct { |
28 | u_int8_t ect; | 30 | __u8 ect; |
29 | } tcp; | 31 | } tcp; |
30 | } proto; | 32 | } proto; |
31 | }; | 33 | }; |
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h index ee24fd86a3aa..37bee4442486 100644 --- a/include/linux/netfilter_ipv4/ipt_ttl.h +++ b/include/linux/netfilter_ipv4/ipt_ttl.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #ifndef _IPT_TTL_H | 4 | #ifndef _IPT_TTL_H |
5 | #define _IPT_TTL_H | 5 | #define _IPT_TTL_H |
6 | 6 | ||
7 | #include <linux/types.h> | ||
8 | |||
7 | enum { | 9 | enum { |
8 | IPT_TTL_EQ = 0, /* equals */ | 10 | IPT_TTL_EQ = 0, /* equals */ |
9 | IPT_TTL_NE, /* not equals */ | 11 | IPT_TTL_NE, /* not equals */ |
@@ -13,8 +15,8 @@ enum { | |||
13 | 15 | ||
14 | 16 | ||
15 | struct ipt_ttl_info { | 17 | struct ipt_ttl_info { |
16 | u_int8_t mode; | 18 | __u8 mode; |
17 | u_int8_t ttl; | 19 | __u8 ttl; |
18 | }; | 20 | }; |
19 | 21 | ||
20 | 22 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h index afb7813d45ab..ebd8ead1bb63 100644 --- a/include/linux/netfilter_ipv6/ip6t_HL.h +++ b/include/linux/netfilter_ipv6/ip6t_HL.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #ifndef _IP6T_HL_H | 5 | #ifndef _IP6T_HL_H |
6 | #define _IP6T_HL_H | 6 | #define _IP6T_HL_H |
7 | 7 | ||
8 | #include <linux/types.h> | ||
9 | |||
8 | enum { | 10 | enum { |
9 | IP6T_HL_SET = 0, | 11 | IP6T_HL_SET = 0, |
10 | IP6T_HL_INC, | 12 | IP6T_HL_INC, |
@@ -14,8 +16,8 @@ enum { | |||
14 | #define IP6T_HL_MAXMODE IP6T_HL_DEC | 16 | #define IP6T_HL_MAXMODE IP6T_HL_DEC |
15 | 17 | ||
16 | struct ip6t_HL_info { | 18 | struct ip6t_HL_info { |
17 | u_int8_t mode; | 19 | __u8 mode; |
18 | u_int8_t hop_limit; | 20 | __u8 hop_limit; |
19 | }; | 21 | }; |
20 | 22 | ||
21 | 23 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h index 6be6504162bb..205ed62e4605 100644 --- a/include/linux/netfilter_ipv6/ip6t_REJECT.h +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _IP6T_REJECT_H | 1 | #ifndef _IP6T_REJECT_H |
2 | #define _IP6T_REJECT_H | 2 | #define _IP6T_REJECT_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | enum ip6t_reject_with { | 6 | enum ip6t_reject_with { |
5 | IP6T_ICMP6_NO_ROUTE, | 7 | IP6T_ICMP6_NO_ROUTE, |
6 | IP6T_ICMP6_ADM_PROHIBITED, | 8 | IP6T_ICMP6_ADM_PROHIBITED, |
@@ -12,7 +14,7 @@ enum ip6t_reject_with { | |||
12 | }; | 14 | }; |
13 | 15 | ||
14 | struct ip6t_reject_info { | 16 | struct ip6t_reject_info { |
15 | u_int32_t with; /* reject type */ | 17 | __u32 with; /* reject type */ |
16 | }; | 18 | }; |
17 | 19 | ||
18 | #endif /*_IP6T_REJECT_H*/ | 20 | #endif /*_IP6T_REJECT_H*/ |
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h index 17a745cfb2c7..5da2b65cb3ad 100644 --- a/include/linux/netfilter_ipv6/ip6t_ah.h +++ b/include/linux/netfilter_ipv6/ip6t_ah.h | |||
@@ -1,11 +1,13 @@ | |||
1 | #ifndef _IP6T_AH_H | 1 | #ifndef _IP6T_AH_H |
2 | #define _IP6T_AH_H | 2 | #define _IP6T_AH_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct ip6t_ah { | 6 | struct ip6t_ah { |
5 | u_int32_t spis[2]; /* Security Parameter Index */ | 7 | __u32 spis[2]; /* Security Parameter Index */ |
6 | u_int32_t hdrlen; /* Header Length */ | 8 | __u32 hdrlen; /* Header Length */ |
7 | u_int8_t hdrres; /* Test of the Reserved Filed */ | 9 | __u8 hdrres; /* Test of the Reserved Filed */ |
8 | u_int8_t invflags; /* Inverse flags */ | 10 | __u8 invflags; /* Inverse flags */ |
9 | }; | 11 | }; |
10 | 12 | ||
11 | #define IP6T_AH_SPI 0x01 | 13 | #define IP6T_AH_SPI 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h index 3724d0850920..b47f61b9e082 100644 --- a/include/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/linux/netfilter_ipv6/ip6t_frag.h | |||
@@ -1,11 +1,13 @@ | |||
1 | #ifndef _IP6T_FRAG_H | 1 | #ifndef _IP6T_FRAG_H |
2 | #define _IP6T_FRAG_H | 2 | #define _IP6T_FRAG_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | struct ip6t_frag { | 6 | struct ip6t_frag { |
5 | u_int32_t ids[2]; /* Security Parameter Index */ | 7 | __u32 ids[2]; /* Security Parameter Index */ |
6 | u_int32_t hdrlen; /* Header Length */ | 8 | __u32 hdrlen; /* Header Length */ |
7 | u_int8_t flags; /* */ | 9 | __u8 flags; /* */ |
8 | u_int8_t invflags; /* Inverse flags */ | 10 | __u8 invflags; /* Inverse flags */ |
9 | }; | 11 | }; |
10 | 12 | ||
11 | #define IP6T_FRAG_IDS 0x01 | 13 | #define IP6T_FRAG_IDS 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h index 5ef91b8319a8..6e76dbc6c19a 100644 --- a/include/linux/netfilter_ipv6/ip6t_hl.h +++ b/include/linux/netfilter_ipv6/ip6t_hl.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #ifndef _IP6T_HL_H | 5 | #ifndef _IP6T_HL_H |
6 | #define _IP6T_HL_H | 6 | #define _IP6T_HL_H |
7 | 7 | ||
8 | #include <linux/types.h> | ||
9 | |||
8 | enum { | 10 | enum { |
9 | IP6T_HL_EQ = 0, /* equals */ | 11 | IP6T_HL_EQ = 0, /* equals */ |
10 | IP6T_HL_NE, /* not equals */ | 12 | IP6T_HL_NE, /* not equals */ |
@@ -14,8 +16,8 @@ enum { | |||
14 | 16 | ||
15 | 17 | ||
16 | struct ip6t_hl_info { | 18 | struct ip6t_hl_info { |
17 | u_int8_t mode; | 19 | __u8 mode; |
18 | u_int8_t hop_limit; | 20 | __u8 hop_limit; |
19 | }; | 21 | }; |
20 | 22 | ||
21 | 23 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h index 01dfd445596a..efae3a20c214 100644 --- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h +++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h | |||
@@ -8,10 +8,12 @@ on whether they contain certain headers */ | |||
8 | #ifndef __IPV6HEADER_H | 8 | #ifndef __IPV6HEADER_H |
9 | #define __IPV6HEADER_H | 9 | #define __IPV6HEADER_H |
10 | 10 | ||
11 | #include <linux/types.h> | ||
12 | |||
11 | struct ip6t_ipv6header_info { | 13 | struct ip6t_ipv6header_info { |
12 | u_int8_t matchflags; | 14 | __u8 matchflags; |
13 | u_int8_t invflags; | 15 | __u8 invflags; |
14 | u_int8_t modeflag; | 16 | __u8 modeflag; |
15 | }; | 17 | }; |
16 | 18 | ||
17 | #define MASK_HOPOPTS 128 | 19 | #define MASK_HOPOPTS 128 |
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h index 18549bca2d1f..a7729a5025cd 100644 --- a/include/linux/netfilter_ipv6/ip6t_mh.h +++ b/include/linux/netfilter_ipv6/ip6t_mh.h | |||
@@ -1,10 +1,12 @@ | |||
1 | #ifndef _IP6T_MH_H | 1 | #ifndef _IP6T_MH_H |
2 | #define _IP6T_MH_H | 2 | #define _IP6T_MH_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | /* MH matching stuff */ | 6 | /* MH matching stuff */ |
5 | struct ip6t_mh { | 7 | struct ip6t_mh { |
6 | u_int8_t types[2]; /* MH type range */ | 8 | __u8 types[2]; /* MH type range */ |
7 | u_int8_t invflags; /* Inverse flags */ | 9 | __u8 invflags; /* Inverse flags */ |
8 | }; | 10 | }; |
9 | 11 | ||
10 | /* Values for "invflags" field in struct ip6t_mh. */ | 12 | /* Values for "invflags" field in struct ip6t_mh. */ |
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h index 62d89bcd9f9c..17d419a811fd 100644 --- a/include/linux/netfilter_ipv6/ip6t_opts.h +++ b/include/linux/netfilter_ipv6/ip6t_opts.h | |||
@@ -1,14 +1,16 @@ | |||
1 | #ifndef _IP6T_OPTS_H | 1 | #ifndef _IP6T_OPTS_H |
2 | #define _IP6T_OPTS_H | 2 | #define _IP6T_OPTS_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
5 | |||
4 | #define IP6T_OPTS_OPTSNR 16 | 6 | #define IP6T_OPTS_OPTSNR 16 |
5 | 7 | ||
6 | struct ip6t_opts { | 8 | struct ip6t_opts { |
7 | u_int32_t hdrlen; /* Header Length */ | 9 | __u32 hdrlen; /* Header Length */ |
8 | u_int8_t flags; /* */ | 10 | __u8 flags; /* */ |
9 | u_int8_t invflags; /* Inverse flags */ | 11 | __u8 invflags; /* Inverse flags */ |
10 | u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */ | 12 | __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */ |
11 | u_int8_t optsnr; /* Nr of OPts */ | 13 | __u8 optsnr; /* Nr of OPts */ |
12 | }; | 14 | }; |
13 | 15 | ||
14 | #define IP6T_OPTS_LEN 0x01 | 16 | #define IP6T_OPTS_LEN 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h index ab91bfd2cd00..7605a5ff81cd 100644 --- a/include/linux/netfilter_ipv6/ip6t_rt.h +++ b/include/linux/netfilter_ipv6/ip6t_rt.h | |||
@@ -1,18 +1,19 @@ | |||
1 | #ifndef _IP6T_RT_H | 1 | #ifndef _IP6T_RT_H |
2 | #define _IP6T_RT_H | 2 | #define _IP6T_RT_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
4 | /*#include <linux/in6.h>*/ | 5 | /*#include <linux/in6.h>*/ |
5 | 6 | ||
6 | #define IP6T_RT_HOPS 16 | 7 | #define IP6T_RT_HOPS 16 |
7 | 8 | ||
8 | struct ip6t_rt { | 9 | struct ip6t_rt { |
9 | u_int32_t rt_type; /* Routing Type */ | 10 | __u32 rt_type; /* Routing Type */ |
10 | u_int32_t segsleft[2]; /* Segments Left */ | 11 | __u32 segsleft[2]; /* Segments Left */ |
11 | u_int32_t hdrlen; /* Header Length */ | 12 | __u32 hdrlen; /* Header Length */ |
12 | u_int8_t flags; /* */ | 13 | __u8 flags; /* */ |
13 | u_int8_t invflags; /* Inverse flags */ | 14 | __u8 invflags; /* Inverse flags */ |
14 | struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ | 15 | struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ |
15 | u_int8_t addrnr; /* Nr of Addresses */ | 16 | __u8 addrnr; /* Nr of Addresses */ |
16 | }; | 17 | }; |
17 | 18 | ||
18 | #define IP6T_RT_TYP 0x01 | 19 | #define IP6T_RT_TYP 0x01 |
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 2cfa4bc8dea6..776cd93d5f7b 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
@@ -481,4 +481,16 @@ struct tc_drr_stats { | |||
481 | __u32 deficit; | 481 | __u32 deficit; |
482 | }; | 482 | }; |
483 | 483 | ||
484 | /* MQPRIO */ | ||
485 | #define TC_QOPT_BITMASK 15 | ||
486 | #define TC_QOPT_MAX_QUEUE 16 | ||
487 | |||
488 | struct tc_mqprio_qopt { | ||
489 | __u8 num_tc; | ||
490 | __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; | ||
491 | __u8 hw; | ||
492 | __u16 count[TC_QOPT_MAX_QUEUE]; | ||
493 | __u16 offset[TC_QOPT_MAX_QUEUE]; | ||
494 | }; | ||
495 | |||
484 | #endif | 496 | #endif |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf221d65d9ad..6e946da9d1d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -1801,6 +1801,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) | |||
1801 | prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ | 1801 | prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ |
1802 | skb = skb->prev) | 1802 | skb = skb->prev) |
1803 | 1803 | ||
1804 | #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ | ||
1805 | for (skb = (queue)->prev, tmp = skb->prev; \ | ||
1806 | skb != (struct sk_buff *)(queue); \ | ||
1807 | skb = tmp, tmp = skb->prev) | ||
1808 | |||
1809 | #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \ | ||
1810 | for (tmp = skb->prev; \ | ||
1811 | skb != (struct sk_buff *)(queue); \ | ||
1812 | skb = tmp, tmp = skb->prev) | ||
1804 | 1813 | ||
1805 | static inline bool skb_has_frag_list(const struct sk_buff *skb) | 1814 | static inline bool skb_has_frag_list(const struct sk_buff *skb) |
1806 | { | 1815 | { |
diff --git a/include/net/dst.h b/include/net/dst.h index 93b0310317be..be5a0d4c491d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -72,7 +72,7 @@ struct dst_entry { | |||
72 | 72 | ||
73 | u32 _metrics[RTAX_MAX]; | 73 | u32 _metrics[RTAX_MAX]; |
74 | 74 | ||
75 | #ifdef CONFIG_NET_CLS_ROUTE | 75 | #ifdef CONFIG_IP_ROUTE_CLASSID |
76 | __u32 tclassid; | 76 | __u32 tclassid; |
77 | #else | 77 | #else |
78 | __u32 __pad2; | 78 | __u32 __pad2; |
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 07bdb5e9e8ac..65d1fcdbc63b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -55,7 +55,7 @@ struct fib_nh { | |||
55 | int nh_weight; | 55 | int nh_weight; |
56 | int nh_power; | 56 | int nh_power; |
57 | #endif | 57 | #endif |
58 | #ifdef CONFIG_NET_CLS_ROUTE | 58 | #ifdef CONFIG_IP_ROUTE_CLASSID |
59 | __u32 nh_tclassid; | 59 | __u32 nh_tclassid; |
60 | #endif | 60 | #endif |
61 | int nh_oif; | 61 | int nh_oif; |
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, | |||
201 | extern int __net_init fib4_rules_init(struct net *net); | 201 | extern int __net_init fib4_rules_init(struct net *net); |
202 | extern void __net_exit fib4_rules_exit(struct net *net); | 202 | extern void __net_exit fib4_rules_exit(struct net *net); |
203 | 203 | ||
204 | #ifdef CONFIG_NET_CLS_ROUTE | 204 | #ifdef CONFIG_IP_ROUTE_CLASSID |
205 | extern u32 fib_rules_tclass(struct fib_result *res); | 205 | extern u32 fib_rules_tclass(struct fib_result *res); |
206 | #endif | 206 | #endif |
207 | 207 | ||
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id); | |||
235 | 235 | ||
236 | static inline void fib_combine_itag(u32 *itag, struct fib_result *res) | 236 | static inline void fib_combine_itag(u32 *itag, struct fib_result *res) |
237 | { | 237 | { |
238 | #ifdef CONFIG_NET_CLS_ROUTE | 238 | #ifdef CONFIG_IP_ROUTE_CLASSID |
239 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 239 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
240 | u32 rtag; | 240 | u32 rtag; |
241 | #endif | 241 | #endif |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b7bbd6c28cfa..b23bea62f708 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -28,6 +28,80 @@ | |||
28 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 28 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
29 | #include <net/netfilter/nf_conntrack.h> | 29 | #include <net/netfilter/nf_conntrack.h> |
30 | #endif | 30 | #endif |
31 | #include <net/net_namespace.h> /* Netw namespace */ | ||
32 | |||
33 | /* | ||
34 | * Generic access of ipvs struct | ||
35 | */ | ||
36 | static inline struct netns_ipvs *net_ipvs(struct net* net) | ||
37 | { | ||
38 | return net->ipvs; | ||
39 | } | ||
40 | /* | ||
41 | * Get net ptr from skb in traffic cases | ||
42 | * use skb_sknet when call is from userland (ioctl or netlink) | ||
43 | */ | ||
44 | static inline struct net *skb_net(const struct sk_buff *skb) | ||
45 | { | ||
46 | #ifdef CONFIG_NET_NS | ||
47 | #ifdef CONFIG_IP_VS_DEBUG | ||
48 | /* | ||
49 | * This is used for debug only. | ||
50 | * Start with the most likely hit | ||
51 | * End with BUG | ||
52 | */ | ||
53 | if (likely(skb->dev && skb->dev->nd_net)) | ||
54 | return dev_net(skb->dev); | ||
55 | if (skb_dst(skb)->dev) | ||
56 | return dev_net(skb_dst(skb)->dev); | ||
57 | WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", | ||
58 | __func__, __LINE__); | ||
59 | if (likely(skb->sk && skb->sk->sk_net)) | ||
60 | return sock_net(skb->sk); | ||
61 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | ||
62 | __func__, __LINE__); | ||
63 | BUG(); | ||
64 | #else | ||
65 | return dev_net(skb->dev ? : skb_dst(skb)->dev); | ||
66 | #endif | ||
67 | #else | ||
68 | return &init_net; | ||
69 | #endif | ||
70 | } | ||
71 | |||
72 | static inline struct net *skb_sknet(const struct sk_buff *skb) | ||
73 | { | ||
74 | #ifdef CONFIG_NET_NS | ||
75 | #ifdef CONFIG_IP_VS_DEBUG | ||
76 | /* Start with the most likely hit */ | ||
77 | if (likely(skb->sk && skb->sk->sk_net)) | ||
78 | return sock_net(skb->sk); | ||
79 | WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", | ||
80 | __func__, __LINE__); | ||
81 | if (likely(skb->dev && skb->dev->nd_net)) | ||
82 | return dev_net(skb->dev); | ||
83 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | ||
84 | __func__, __LINE__); | ||
85 | BUG(); | ||
86 | #else | ||
87 | return sock_net(skb->sk); | ||
88 | #endif | ||
89 | #else | ||
90 | return &init_net; | ||
91 | #endif | ||
92 | } | ||
93 | /* | ||
94 | * This one needed for single_open_net since net is stored directly in | ||
95 | * private not as a struct i.e. seq_file_net cant be used. | ||
96 | */ | ||
97 | static inline struct net *seq_file_single_net(struct seq_file *seq) | ||
98 | { | ||
99 | #ifdef CONFIG_NET_NS | ||
100 | return (struct net *)seq->private; | ||
101 | #else | ||
102 | return &init_net; | ||
103 | #endif | ||
104 | } | ||
31 | 105 | ||
32 | /* Connections' size value needed by ip_vs_ctl.c */ | 106 | /* Connections' size value needed by ip_vs_ctl.c */ |
33 | extern int ip_vs_conn_tab_size; | 107 | extern int ip_vs_conn_tab_size; |
@@ -258,6 +332,23 @@ struct ip_vs_seq { | |||
258 | before last resized pkt */ | 332 | before last resized pkt */ |
259 | }; | 333 | }; |
260 | 334 | ||
335 | /* | ||
336 | * counters per cpu | ||
337 | */ | ||
338 | struct ip_vs_counters { | ||
339 | __u32 conns; /* connections scheduled */ | ||
340 | __u32 inpkts; /* incoming packets */ | ||
341 | __u32 outpkts; /* outgoing packets */ | ||
342 | __u64 inbytes; /* incoming bytes */ | ||
343 | __u64 outbytes; /* outgoing bytes */ | ||
344 | }; | ||
345 | /* | ||
346 | * Stats per cpu | ||
347 | */ | ||
348 | struct ip_vs_cpu_stats { | ||
349 | struct ip_vs_counters ustats; | ||
350 | struct u64_stats_sync syncp; | ||
351 | }; | ||
261 | 352 | ||
262 | /* | 353 | /* |
263 | * IPVS statistics objects | 354 | * IPVS statistics objects |
@@ -279,17 +370,34 @@ struct ip_vs_estimator { | |||
279 | }; | 370 | }; |
280 | 371 | ||
281 | struct ip_vs_stats { | 372 | struct ip_vs_stats { |
282 | struct ip_vs_stats_user ustats; /* statistics */ | 373 | struct ip_vs_stats_user ustats; /* statistics */ |
283 | struct ip_vs_estimator est; /* estimator */ | 374 | struct ip_vs_estimator est; /* estimator */ |
284 | 375 | struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ | |
285 | spinlock_t lock; /* spin lock */ | 376 | spinlock_t lock; /* spin lock */ |
286 | }; | 377 | }; |
287 | 378 | ||
379 | /* | ||
380 | * Helper Macros for per cpu | ||
381 | * ipvs->tot_stats->ustats.count | ||
382 | */ | ||
383 | #define IPVS_STAT_INC(ipvs, count) \ | ||
384 | __this_cpu_inc((ipvs)->ustats->count) | ||
385 | |||
386 | #define IPVS_STAT_ADD(ipvs, count, value) \ | ||
387 | do {\ | ||
388 | write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \ | ||
389 | raw_smp_processor_id())); \ | ||
390 | __this_cpu_add((ipvs)->ustats->count, value); \ | ||
391 | write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \ | ||
392 | raw_smp_processor_id())); \ | ||
393 | } while (0) | ||
394 | |||
288 | struct dst_entry; | 395 | struct dst_entry; |
289 | struct iphdr; | 396 | struct iphdr; |
290 | struct ip_vs_conn; | 397 | struct ip_vs_conn; |
291 | struct ip_vs_app; | 398 | struct ip_vs_app; |
292 | struct sk_buff; | 399 | struct sk_buff; |
400 | struct ip_vs_proto_data; | ||
293 | 401 | ||
294 | struct ip_vs_protocol { | 402 | struct ip_vs_protocol { |
295 | struct ip_vs_protocol *next; | 403 | struct ip_vs_protocol *next; |
@@ -297,21 +405,22 @@ struct ip_vs_protocol { | |||
297 | u16 protocol; | 405 | u16 protocol; |
298 | u16 num_states; | 406 | u16 num_states; |
299 | int dont_defrag; | 407 | int dont_defrag; |
300 | atomic_t appcnt; /* counter of proto app incs */ | ||
301 | int *timeout_table; /* protocol timeout table */ | ||
302 | 408 | ||
303 | void (*init)(struct ip_vs_protocol *pp); | 409 | void (*init)(struct ip_vs_protocol *pp); |
304 | 410 | ||
305 | void (*exit)(struct ip_vs_protocol *pp); | 411 | void (*exit)(struct ip_vs_protocol *pp); |
306 | 412 | ||
413 | void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); | ||
414 | |||
415 | void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); | ||
416 | |||
307 | int (*conn_schedule)(int af, struct sk_buff *skb, | 417 | int (*conn_schedule)(int af, struct sk_buff *skb, |
308 | struct ip_vs_protocol *pp, | 418 | struct ip_vs_proto_data *pd, |
309 | int *verdict, struct ip_vs_conn **cpp); | 419 | int *verdict, struct ip_vs_conn **cpp); |
310 | 420 | ||
311 | struct ip_vs_conn * | 421 | struct ip_vs_conn * |
312 | (*conn_in_get)(int af, | 422 | (*conn_in_get)(int af, |
313 | const struct sk_buff *skb, | 423 | const struct sk_buff *skb, |
314 | struct ip_vs_protocol *pp, | ||
315 | const struct ip_vs_iphdr *iph, | 424 | const struct ip_vs_iphdr *iph, |
316 | unsigned int proto_off, | 425 | unsigned int proto_off, |
317 | int inverse); | 426 | int inverse); |
@@ -319,7 +428,6 @@ struct ip_vs_protocol { | |||
319 | struct ip_vs_conn * | 428 | struct ip_vs_conn * |
320 | (*conn_out_get)(int af, | 429 | (*conn_out_get)(int af, |
321 | const struct sk_buff *skb, | 430 | const struct sk_buff *skb, |
322 | struct ip_vs_protocol *pp, | ||
323 | const struct ip_vs_iphdr *iph, | 431 | const struct ip_vs_iphdr *iph, |
324 | unsigned int proto_off, | 432 | unsigned int proto_off, |
325 | int inverse); | 433 | int inverse); |
@@ -337,11 +445,11 @@ struct ip_vs_protocol { | |||
337 | 445 | ||
338 | int (*state_transition)(struct ip_vs_conn *cp, int direction, | 446 | int (*state_transition)(struct ip_vs_conn *cp, int direction, |
339 | const struct sk_buff *skb, | 447 | const struct sk_buff *skb, |
340 | struct ip_vs_protocol *pp); | 448 | struct ip_vs_proto_data *pd); |
341 | 449 | ||
342 | int (*register_app)(struct ip_vs_app *inc); | 450 | int (*register_app)(struct net *net, struct ip_vs_app *inc); |
343 | 451 | ||
344 | void (*unregister_app)(struct ip_vs_app *inc); | 452 | void (*unregister_app)(struct net *net, struct ip_vs_app *inc); |
345 | 453 | ||
346 | int (*app_conn_bind)(struct ip_vs_conn *cp); | 454 | int (*app_conn_bind)(struct ip_vs_conn *cp); |
347 | 455 | ||
@@ -350,14 +458,26 @@ struct ip_vs_protocol { | |||
350 | int offset, | 458 | int offset, |
351 | const char *msg); | 459 | const char *msg); |
352 | 460 | ||
353 | void (*timeout_change)(struct ip_vs_protocol *pp, int flags); | 461 | void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); |
462 | }; | ||
354 | 463 | ||
355 | int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); | 464 | /* |
465 | * protocol data per netns | ||
466 | */ | ||
467 | struct ip_vs_proto_data { | ||
468 | struct ip_vs_proto_data *next; | ||
469 | struct ip_vs_protocol *pp; | ||
470 | int *timeout_table; /* protocol timeout table */ | ||
471 | atomic_t appcnt; /* counter of proto app incs. */ | ||
472 | struct tcp_states_t *tcp_state_table; | ||
356 | }; | 473 | }; |
357 | 474 | ||
358 | extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); | 475 | extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto); |
476 | extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, | ||
477 | unsigned short proto); | ||
359 | 478 | ||
360 | struct ip_vs_conn_param { | 479 | struct ip_vs_conn_param { |
480 | struct net *net; | ||
361 | const union nf_inet_addr *caddr; | 481 | const union nf_inet_addr *caddr; |
362 | const union nf_inet_addr *vaddr; | 482 | const union nf_inet_addr *vaddr; |
363 | __be16 cport; | 483 | __be16 cport; |
@@ -375,16 +495,19 @@ struct ip_vs_conn_param { | |||
375 | */ | 495 | */ |
376 | struct ip_vs_conn { | 496 | struct ip_vs_conn { |
377 | struct list_head c_list; /* hashed list heads */ | 497 | struct list_head c_list; /* hashed list heads */ |
378 | 498 | #ifdef CONFIG_NET_NS | |
499 | struct net *net; /* Name space */ | ||
500 | #endif | ||
379 | /* Protocol, addresses and port numbers */ | 501 | /* Protocol, addresses and port numbers */ |
380 | u16 af; /* address family */ | 502 | u16 af; /* address family */ |
381 | union nf_inet_addr caddr; /* client address */ | 503 | __be16 cport; |
382 | union nf_inet_addr vaddr; /* virtual address */ | 504 | __be16 vport; |
383 | union nf_inet_addr daddr; /* destination address */ | 505 | __be16 dport; |
384 | volatile __u32 flags; /* status flags */ | 506 | __u32 fwmark; /* Fire wall mark from skb */ |
385 | __be16 cport; | 507 | union nf_inet_addr caddr; /* client address */ |
386 | __be16 vport; | 508 | union nf_inet_addr vaddr; /* virtual address */ |
387 | __be16 dport; | 509 | union nf_inet_addr daddr; /* destination address */ |
510 | volatile __u32 flags; /* status flags */ | ||
388 | __u16 protocol; /* Which protocol (TCP/UDP) */ | 511 | __u16 protocol; /* Which protocol (TCP/UDP) */ |
389 | 512 | ||
390 | /* counter and timer */ | 513 | /* counter and timer */ |
@@ -422,10 +545,38 @@ struct ip_vs_conn { | |||
422 | struct ip_vs_seq in_seq; /* incoming seq. struct */ | 545 | struct ip_vs_seq in_seq; /* incoming seq. struct */ |
423 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | 546 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ |
424 | 547 | ||
548 | const struct ip_vs_pe *pe; | ||
425 | char *pe_data; | 549 | char *pe_data; |
426 | __u8 pe_data_len; | 550 | __u8 pe_data_len; |
427 | }; | 551 | }; |
428 | 552 | ||
553 | /* | ||
554 | * To save some memory in conn table when name space is disabled. | ||
555 | */ | ||
556 | static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) | ||
557 | { | ||
558 | #ifdef CONFIG_NET_NS | ||
559 | return cp->net; | ||
560 | #else | ||
561 | return &init_net; | ||
562 | #endif | ||
563 | } | ||
564 | static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) | ||
565 | { | ||
566 | #ifdef CONFIG_NET_NS | ||
567 | cp->net = net; | ||
568 | #endif | ||
569 | } | ||
570 | |||
571 | static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, | ||
572 | struct net *net) | ||
573 | { | ||
574 | #ifdef CONFIG_NET_NS | ||
575 | return cp->net == net; | ||
576 | #else | ||
577 | return 1; | ||
578 | #endif | ||
579 | } | ||
429 | 580 | ||
430 | /* | 581 | /* |
431 | * Extended internal versions of struct ip_vs_service_user and | 582 | * Extended internal versions of struct ip_vs_service_user and |
@@ -485,6 +636,7 @@ struct ip_vs_service { | |||
485 | unsigned flags; /* service status flags */ | 636 | unsigned flags; /* service status flags */ |
486 | unsigned timeout; /* persistent timeout in ticks */ | 637 | unsigned timeout; /* persistent timeout in ticks */ |
487 | __be32 netmask; /* grouping granularity */ | 638 | __be32 netmask; /* grouping granularity */ |
639 | struct net *net; | ||
488 | 640 | ||
489 | struct list_head destinations; /* real server d-linked list */ | 641 | struct list_head destinations; /* real server d-linked list */ |
490 | __u32 num_dests; /* number of servers */ | 642 | __u32 num_dests; /* number of servers */ |
@@ -510,8 +662,8 @@ struct ip_vs_dest { | |||
510 | struct list_head d_list; /* for table with all the dests */ | 662 | struct list_head d_list; /* for table with all the dests */ |
511 | 663 | ||
512 | u16 af; /* address family */ | 664 | u16 af; /* address family */ |
513 | union nf_inet_addr addr; /* IP address of the server */ | ||
514 | __be16 port; /* port number of the server */ | 665 | __be16 port; /* port number of the server */ |
666 | union nf_inet_addr addr; /* IP address of the server */ | ||
515 | volatile unsigned flags; /* dest status flags */ | 667 | volatile unsigned flags; /* dest status flags */ |
516 | atomic_t conn_flags; /* flags to copy to conn */ | 668 | atomic_t conn_flags; /* flags to copy to conn */ |
517 | atomic_t weight; /* server weight */ | 669 | atomic_t weight; /* server weight */ |
@@ -538,8 +690,8 @@ struct ip_vs_dest { | |||
538 | /* for virtual service */ | 690 | /* for virtual service */ |
539 | struct ip_vs_service *svc; /* service it belongs to */ | 691 | struct ip_vs_service *svc; /* service it belongs to */ |
540 | __u16 protocol; /* which protocol (TCP/UDP) */ | 692 | __u16 protocol; /* which protocol (TCP/UDP) */ |
541 | union nf_inet_addr vaddr; /* virtual IP address */ | ||
542 | __be16 vport; /* virtual port number */ | 693 | __be16 vport; /* virtual port number */ |
694 | union nf_inet_addr vaddr; /* virtual IP address */ | ||
543 | __u32 vfwmark; /* firewall mark of service */ | 695 | __u32 vfwmark; /* firewall mark of service */ |
544 | }; | 696 | }; |
545 | 697 | ||
@@ -674,13 +826,14 @@ enum { | |||
674 | IP_VS_DIR_LAST, | 826 | IP_VS_DIR_LAST, |
675 | }; | 827 | }; |
676 | 828 | ||
677 | static inline void ip_vs_conn_fill_param(int af, int protocol, | 829 | static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, |
678 | const union nf_inet_addr *caddr, | 830 | const union nf_inet_addr *caddr, |
679 | __be16 cport, | 831 | __be16 cport, |
680 | const union nf_inet_addr *vaddr, | 832 | const union nf_inet_addr *vaddr, |
681 | __be16 vport, | 833 | __be16 vport, |
682 | struct ip_vs_conn_param *p) | 834 | struct ip_vs_conn_param *p) |
683 | { | 835 | { |
836 | p->net = net; | ||
684 | p->af = af; | 837 | p->af = af; |
685 | p->protocol = protocol; | 838 | p->protocol = protocol; |
686 | p->caddr = caddr; | 839 | p->caddr = caddr; |
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); | |||
695 | struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); | 848 | struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); |
696 | 849 | ||
697 | struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | 850 | struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, |
698 | struct ip_vs_protocol *pp, | ||
699 | const struct ip_vs_iphdr *iph, | 851 | const struct ip_vs_iphdr *iph, |
700 | unsigned int proto_off, | 852 | unsigned int proto_off, |
701 | int inverse); | 853 | int inverse); |
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | |||
703 | struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); | 855 | struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); |
704 | 856 | ||
705 | struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, | 857 | struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, |
706 | struct ip_vs_protocol *pp, | ||
707 | const struct ip_vs_iphdr *iph, | 858 | const struct ip_vs_iphdr *iph, |
708 | unsigned int proto_off, | 859 | unsigned int proto_off, |
709 | int inverse); | 860 | int inverse); |
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); | |||
719 | struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, | 870 | struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, |
720 | const union nf_inet_addr *daddr, | 871 | const union nf_inet_addr *daddr, |
721 | __be16 dport, unsigned flags, | 872 | __be16 dport, unsigned flags, |
722 | struct ip_vs_dest *dest); | 873 | struct ip_vs_dest *dest, __u32 fwmark); |
723 | extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); | 874 | extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); |
724 | 875 | ||
725 | extern const char * ip_vs_state_name(__u16 proto, int state); | 876 | extern const char * ip_vs_state_name(__u16 proto, int state); |
726 | 877 | ||
727 | extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); | 878 | extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); |
728 | extern int ip_vs_check_template(struct ip_vs_conn *ct); | 879 | extern int ip_vs_check_template(struct ip_vs_conn *ct); |
729 | extern void ip_vs_random_dropentry(void); | 880 | extern void ip_vs_random_dropentry(struct net *net); |
730 | extern int ip_vs_conn_init(void); | 881 | extern int ip_vs_conn_init(void); |
731 | extern void ip_vs_conn_cleanup(void); | 882 | extern void ip_vs_conn_cleanup(void); |
732 | 883 | ||
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) | |||
796 | * (from ip_vs_app.c) | 947 | * (from ip_vs_app.c) |
797 | */ | 948 | */ |
798 | #define IP_VS_APP_MAX_PORTS 8 | 949 | #define IP_VS_APP_MAX_PORTS 8 |
799 | extern int register_ip_vs_app(struct ip_vs_app *app); | 950 | extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); |
800 | extern void unregister_ip_vs_app(struct ip_vs_app *app); | 951 | extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); |
801 | extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | 952 | extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); |
802 | extern void ip_vs_unbind_app(struct ip_vs_conn *cp); | 953 | extern void ip_vs_unbind_app(struct ip_vs_conn *cp); |
803 | extern int | 954 | extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, |
804 | register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); | 955 | __u16 proto, __u16 port); |
805 | extern int ip_vs_app_inc_get(struct ip_vs_app *inc); | 956 | extern int ip_vs_app_inc_get(struct ip_vs_app *inc); |
806 | extern void ip_vs_app_inc_put(struct ip_vs_app *inc); | 957 | extern void ip_vs_app_inc_put(struct ip_vs_app *inc); |
807 | 958 | ||
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); | |||
814 | void ip_vs_unbind_pe(struct ip_vs_service *svc); | 965 | void ip_vs_unbind_pe(struct ip_vs_service *svc); |
815 | int register_ip_vs_pe(struct ip_vs_pe *pe); | 966 | int register_ip_vs_pe(struct ip_vs_pe *pe); |
816 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); | 967 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); |
817 | extern struct ip_vs_pe *ip_vs_pe_get(const char *name); | 968 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); |
818 | extern void ip_vs_pe_put(struct ip_vs_pe *pe); | 969 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); |
970 | |||
971 | static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) | ||
972 | { | ||
973 | if (pe && pe->module) | ||
974 | __module_get(pe->module); | ||
975 | } | ||
976 | |||
977 | static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) | ||
978 | { | ||
979 | if (pe && pe->module) | ||
980 | module_put(pe->module); | ||
981 | } | ||
819 | 982 | ||
820 | /* | 983 | /* |
821 | * IPVS protocol functions (from ip_vs_proto.c) | 984 | * IPVS protocol functions (from ip_vs_proto.c) |
822 | */ | 985 | */ |
823 | extern int ip_vs_protocol_init(void); | 986 | extern int ip_vs_protocol_init(void); |
824 | extern void ip_vs_protocol_cleanup(void); | 987 | extern void ip_vs_protocol_cleanup(void); |
825 | extern void ip_vs_protocol_timeout_change(int flags); | 988 | extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); |
826 | extern int *ip_vs_create_timeout_table(int *table, int size); | 989 | extern int *ip_vs_create_timeout_table(int *table, int size); |
827 | extern int | 990 | extern int |
828 | ip_vs_set_state_timeout(int *table, int num, const char *const *names, | 991 | ip_vs_set_state_timeout(int *table, int num, const char *const *names, |
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); | |||
852 | extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); | 1015 | extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); |
853 | extern struct ip_vs_conn * | 1016 | extern struct ip_vs_conn * |
854 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | 1017 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, |
855 | struct ip_vs_protocol *pp, int *ignored); | 1018 | struct ip_vs_proto_data *pd, int *ignored); |
856 | extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | 1019 | extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, |
857 | struct ip_vs_protocol *pp); | 1020 | struct ip_vs_proto_data *pd); |
858 | 1021 | ||
859 | 1022 | ||
860 | /* | 1023 | /* |
861 | * IPVS control data and functions (from ip_vs_ctl.c) | 1024 | * IPVS control data and functions (from ip_vs_ctl.c) |
862 | */ | 1025 | */ |
863 | extern int sysctl_ip_vs_cache_bypass; | ||
864 | extern int sysctl_ip_vs_expire_nodest_conn; | ||
865 | extern int sysctl_ip_vs_expire_quiescent_template; | ||
866 | extern int sysctl_ip_vs_sync_threshold[2]; | ||
867 | extern int sysctl_ip_vs_nat_icmp_send; | ||
868 | extern int sysctl_ip_vs_conntrack; | ||
869 | extern int sysctl_ip_vs_snat_reroute; | ||
870 | extern struct ip_vs_stats ip_vs_stats; | 1026 | extern struct ip_vs_stats ip_vs_stats; |
871 | extern const struct ctl_path net_vs_ctl_path[]; | 1027 | extern const struct ctl_path net_vs_ctl_path[]; |
1028 | extern int sysctl_ip_vs_sync_ver; | ||
872 | 1029 | ||
1030 | extern void ip_vs_sync_switch_mode(struct net *net, int mode); | ||
873 | extern struct ip_vs_service * | 1031 | extern struct ip_vs_service * |
874 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | 1032 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, |
875 | const union nf_inet_addr *vaddr, __be16 vport); | 1033 | const union nf_inet_addr *vaddr, __be16 vport); |
876 | 1034 | ||
877 | static inline void ip_vs_service_put(struct ip_vs_service *svc) | 1035 | static inline void ip_vs_service_put(struct ip_vs_service *svc) |
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) | |||
880 | } | 1038 | } |
881 | 1039 | ||
882 | extern struct ip_vs_dest * | 1040 | extern struct ip_vs_dest * |
883 | ip_vs_lookup_real_service(int af, __u16 protocol, | 1041 | ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, |
884 | const union nf_inet_addr *daddr, __be16 dport); | 1042 | const union nf_inet_addr *daddr, __be16 dport); |
885 | 1043 | ||
886 | extern int ip_vs_use_count_inc(void); | 1044 | extern int ip_vs_use_count_inc(void); |
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void); | |||
888 | extern int ip_vs_control_init(void); | 1046 | extern int ip_vs_control_init(void); |
889 | extern void ip_vs_control_cleanup(void); | 1047 | extern void ip_vs_control_cleanup(void); |
890 | extern struct ip_vs_dest * | 1048 | extern struct ip_vs_dest * |
891 | ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, | 1049 | ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, |
892 | const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); | 1050 | __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, |
1051 | __u16 protocol, __u32 fwmark); | ||
893 | extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | 1052 | extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); |
894 | 1053 | ||
895 | 1054 | ||
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | |||
897 | * IPVS sync daemon data and function prototypes | 1056 | * IPVS sync daemon data and function prototypes |
898 | * (from ip_vs_sync.c) | 1057 | * (from ip_vs_sync.c) |
899 | */ | 1058 | */ |
900 | extern volatile int ip_vs_sync_state; | 1059 | extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, |
901 | extern volatile int ip_vs_master_syncid; | 1060 | __u8 syncid); |
902 | extern volatile int ip_vs_backup_syncid; | 1061 | extern int stop_sync_thread(struct net *net, int state); |
903 | extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 1062 | extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); |
904 | extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 1063 | extern int ip_vs_sync_init(void); |
905 | extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); | 1064 | extern void ip_vs_sync_cleanup(void); |
906 | extern int stop_sync_thread(int state); | ||
907 | extern void ip_vs_sync_conn(struct ip_vs_conn *cp); | ||
908 | 1065 | ||
909 | 1066 | ||
910 | /* | 1067 | /* |
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp); | |||
912 | */ | 1069 | */ |
913 | extern int ip_vs_estimator_init(void); | 1070 | extern int ip_vs_estimator_init(void); |
914 | extern void ip_vs_estimator_cleanup(void); | 1071 | extern void ip_vs_estimator_cleanup(void); |
915 | extern void ip_vs_new_estimator(struct ip_vs_stats *stats); | 1072 | extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); |
916 | extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); | 1073 | extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); |
917 | extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); | 1074 | extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); |
918 | 1075 | ||
919 | /* | 1076 | /* |
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6 | |||
955 | extern int ip_vs_drop_rate; | 1112 | extern int ip_vs_drop_rate; |
956 | extern int ip_vs_drop_counter; | 1113 | extern int ip_vs_drop_counter; |
957 | 1114 | ||
958 | static __inline__ int ip_vs_todrop(void) | 1115 | static inline int ip_vs_todrop(struct netns_ipvs *ipvs) |
959 | { | 1116 | { |
960 | if (!ip_vs_drop_rate) return 0; | 1117 | if (!ipvs->drop_rate) |
961 | if (--ip_vs_drop_counter > 0) return 0; | 1118 | return 0; |
962 | ip_vs_drop_counter = ip_vs_drop_rate; | 1119 | if (--ipvs->drop_counter > 0) |
1120 | return 0; | ||
1121 | ipvs->drop_counter = ipvs->drop_rate; | ||
963 | return 1; | 1122 | return 1; |
964 | } | 1123 | } |
965 | 1124 | ||
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) | |||
1047 | * Netfilter connection tracking | 1206 | * Netfilter connection tracking |
1048 | * (from ip_vs_nfct.c) | 1207 | * (from ip_vs_nfct.c) |
1049 | */ | 1208 | */ |
1050 | static inline int ip_vs_conntrack_enabled(void) | 1209 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) |
1051 | { | 1210 | { |
1052 | return sysctl_ip_vs_conntrack; | 1211 | return ipvs->sysctl_conntrack; |
1053 | } | 1212 | } |
1054 | 1213 | ||
1055 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, | 1214 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, |
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); | |||
1062 | 1221 | ||
1063 | #else | 1222 | #else |
1064 | 1223 | ||
1065 | static inline int ip_vs_conntrack_enabled(void) | 1224 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) |
1066 | { | 1225 | { |
1067 | return 0; | 1226 | return 0; |
1068 | } | 1227 | } |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1bf812b21fb7..b3b4a34cb2cc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netns/conntrack.h> | 20 | #include <net/netns/conntrack.h> |
21 | #endif | 21 | #endif |
22 | #include <net/netns/xfrm.h> | 22 | #include <net/netns/xfrm.h> |
23 | #include <net/netns/ip_vs.h> | ||
23 | 24 | ||
24 | struct proc_dir_entry; | 25 | struct proc_dir_entry; |
25 | struct net_device; | 26 | struct net_device; |
@@ -94,6 +95,7 @@ struct net { | |||
94 | #ifdef CONFIG_XFRM | 95 | #ifdef CONFIG_XFRM |
95 | struct netns_xfrm xfrm; | 96 | struct netns_xfrm xfrm; |
96 | #endif | 97 | #endif |
98 | struct netns_ipvs *ipvs; | ||
97 | }; | 99 | }; |
98 | 100 | ||
99 | 101 | ||
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d85cff10e169..d0d13378991e 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto { | |||
50 | /* per conntrack: application helper private data */ | 50 | /* per conntrack: application helper private data */ |
51 | union nf_conntrack_help { | 51 | union nf_conntrack_help { |
52 | /* insert conntrack helper private data (master) here */ | 52 | /* insert conntrack helper private data (master) here */ |
53 | #if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) | ||
53 | struct nf_ct_ftp_master ct_ftp_info; | 54 | struct nf_ct_ftp_master ct_ftp_info; |
55 | #endif | ||
56 | #if defined(CONFIG_NF_CONNTRACK_PPTP) || \ | ||
57 | defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) | ||
54 | struct nf_ct_pptp_master ct_pptp_info; | 58 | struct nf_ct_pptp_master ct_pptp_info; |
59 | #endif | ||
60 | #if defined(CONFIG_NF_CONNTRACK_H323) || \ | ||
61 | defined(CONFIG_NF_CONNTRACK_H323_MODULE) | ||
55 | struct nf_ct_h323_master ct_h323_info; | 62 | struct nf_ct_h323_master ct_h323_info; |
63 | #endif | ||
64 | #if defined(CONFIG_NF_CONNTRACK_SANE) || \ | ||
65 | defined(CONFIG_NF_CONNTRACK_SANE_MODULE) | ||
56 | struct nf_ct_sane_master ct_sane_info; | 66 | struct nf_ct_sane_master ct_sane_info; |
67 | #endif | ||
68 | #if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) | ||
57 | struct nf_ct_sip_master ct_sip_info; | 69 | struct nf_ct_sip_master ct_sip_info; |
70 | #endif | ||
58 | }; | 71 | }; |
59 | 72 | ||
60 | #include <linux/types.h> | 73 | #include <linux/types.h> |
@@ -116,14 +129,14 @@ struct nf_conn { | |||
116 | u_int32_t secmark; | 129 | u_int32_t secmark; |
117 | #endif | 130 | #endif |
118 | 131 | ||
119 | /* Storage reserved for other modules: */ | ||
120 | union nf_conntrack_proto proto; | ||
121 | |||
122 | /* Extensions */ | 132 | /* Extensions */ |
123 | struct nf_ct_ext *ext; | 133 | struct nf_ct_ext *ext; |
124 | #ifdef CONFIG_NET_NS | 134 | #ifdef CONFIG_NET_NS |
125 | struct net *ct_net; | 135 | struct net *ct_net; |
126 | #endif | 136 | #endif |
137 | |||
138 | /* Storage reserved for other modules, must be the last member */ | ||
139 | union nf_conntrack_proto proto; | ||
127 | }; | 140 | }; |
128 | 141 | ||
129 | static inline struct nf_conn * | 142 | static inline struct nf_conn * |
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto); | |||
189 | * Allocate a hashtable of hlist_head (if nulls == 0), | 202 | * Allocate a hashtable of hlist_head (if nulls == 0), |
190 | * or hlist_nulls_head (if nulls == 1) | 203 | * or hlist_nulls_head (if nulls == 1) |
191 | */ | 204 | */ |
192 | extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls); | 205 | extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); |
193 | 206 | ||
194 | extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); | 207 | extern void nf_ct_free_hashtable(void *hash, unsigned int size); |
195 | 208 | ||
196 | extern struct nf_conntrack_tuple_hash * | 209 | extern struct nf_conntrack_tuple_hash * |
197 | __nf_conntrack_find(struct net *net, u16 zone, | 210 | __nf_conntrack_find(struct net *net, u16 zone, |
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 96ba5f7dcab6..8fdb04b8cce0 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h | |||
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache { | |||
23 | static inline struct nf_conntrack_ecache * | 23 | static inline struct nf_conntrack_ecache * |
24 | nf_ct_ecache_find(const struct nf_conn *ct) | 24 | nf_ct_ecache_find(const struct nf_conn *ct) |
25 | { | 25 | { |
26 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
26 | return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); | 27 | return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); |
28 | #else | ||
29 | return NULL; | ||
30 | #endif | ||
27 | } | 31 | } |
28 | 32 | ||
29 | static inline struct nf_conntrack_ecache * | 33 | static inline struct nf_conntrack_ecache * |
30 | nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) | 34 | nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) |
31 | { | 35 | { |
36 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
32 | struct net *net = nf_ct_net(ct); | 37 | struct net *net = nf_ct_net(ct); |
33 | struct nf_conntrack_ecache *e; | 38 | struct nf_conntrack_ecache *e; |
34 | 39 | ||
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) | |||
45 | e->expmask = expmask; | 50 | e->expmask = expmask; |
46 | } | 51 | } |
47 | return e; | 52 | return e; |
53 | #else | ||
54 | return NULL; | ||
55 | #endif | ||
48 | }; | 56 | }; |
49 | 57 | ||
50 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | 58 | #ifdef CONFIG_NF_CONNTRACK_EVENTS |
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier { | |||
59 | int (*fcn)(unsigned int events, struct nf_ct_event *item); | 67 | int (*fcn)(unsigned int events, struct nf_ct_event *item); |
60 | }; | 68 | }; |
61 | 69 | ||
62 | extern struct nf_ct_event_notifier *nf_conntrack_event_cb; | 70 | extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; |
63 | extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); | 71 | extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); |
64 | extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); | 72 | extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); |
65 | 73 | ||
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier { | |||
159 | int (*fcn)(unsigned int events, struct nf_exp_event *item); | 167 | int (*fcn)(unsigned int events, struct nf_exp_event *item); |
160 | }; | 168 | }; |
161 | 169 | ||
162 | extern struct nf_exp_event_notifier *nf_expect_event_cb; | 170 | extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb; |
163 | extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); | 171 | extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); |
164 | extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); | 172 | extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); |
165 | 173 | ||
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 0772d296dfdb..2dcf31703acb 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h | |||
@@ -7,10 +7,19 @@ | |||
7 | 7 | ||
8 | enum nf_ct_ext_id { | 8 | enum nf_ct_ext_id { |
9 | NF_CT_EXT_HELPER, | 9 | NF_CT_EXT_HELPER, |
10 | #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) | ||
10 | NF_CT_EXT_NAT, | 11 | NF_CT_EXT_NAT, |
12 | #endif | ||
11 | NF_CT_EXT_ACCT, | 13 | NF_CT_EXT_ACCT, |
14 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
12 | NF_CT_EXT_ECACHE, | 15 | NF_CT_EXT_ECACHE, |
16 | #endif | ||
17 | #ifdef CONFIG_NF_CONNTRACK_ZONES | ||
13 | NF_CT_EXT_ZONE, | 18 | NF_CT_EXT_ZONE, |
19 | #endif | ||
20 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
21 | NF_CT_EXT_TSTAMP, | ||
22 | #endif | ||
14 | NF_CT_EXT_NUM, | 23 | NF_CT_EXT_NUM, |
15 | }; | 24 | }; |
16 | 25 | ||
@@ -19,6 +28,7 @@ enum nf_ct_ext_id { | |||
19 | #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter | 28 | #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter |
20 | #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache | 29 | #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache |
21 | #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone | 30 | #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone |
31 | #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp | ||
22 | 32 | ||
23 | /* Extensions: optional stuff which isn't permanently in struct. */ | 33 | /* Extensions: optional stuff which isn't permanently in struct. */ |
24 | struct nf_ct_ext { | 34 | struct nf_ct_ext { |
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 32c305dbdab6..f1c1311adc2c 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h | |||
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) | |||
63 | extern int nf_conntrack_helper_init(void); | 63 | extern int nf_conntrack_helper_init(void); |
64 | extern void nf_conntrack_helper_fini(void); | 64 | extern void nf_conntrack_helper_fini(void); |
65 | 65 | ||
66 | extern int nf_conntrack_broadcast_help(struct sk_buff *skb, | ||
67 | unsigned int protoff, | ||
68 | struct nf_conn *ct, | ||
69 | enum ip_conntrack_info ctinfo, | ||
70 | unsigned int timeout); | ||
71 | |||
66 | #endif /*_NF_CONNTRACK_HELPER_H*/ | 72 | #endif /*_NF_CONNTRACK_HELPER_H*/ |
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index a7547611e8f1..e8010f445ae1 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h | |||
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto { | |||
73 | struct module *me; | 73 | struct module *me; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; | 76 | extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; |
77 | 77 | ||
78 | /* Protocol registration. */ | 78 | /* Protocol registration. */ |
79 | extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); | 79 | extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); |
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h new file mode 100644 index 000000000000..fc9c82b1f06b --- /dev/null +++ b/include/net/netfilter/nf_conntrack_timestamp.h | |||
@@ -0,0 +1,65 @@ | |||
1 | #ifndef _NF_CONNTRACK_TSTAMP_H | ||
2 | #define _NF_CONNTRACK_TSTAMP_H | ||
3 | |||
4 | #include <net/net_namespace.h> | ||
5 | #include <linux/netfilter/nf_conntrack_common.h> | ||
6 | #include <linux/netfilter/nf_conntrack_tuple_common.h> | ||
7 | #include <net/netfilter/nf_conntrack.h> | ||
8 | #include <net/netfilter/nf_conntrack_extend.h> | ||
9 | |||
10 | struct nf_conn_tstamp { | ||
11 | u_int64_t start; | ||
12 | u_int64_t stop; | ||
13 | }; | ||
14 | |||
15 | static inline | ||
16 | struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) | ||
17 | { | ||
18 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
19 | return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); | ||
20 | #else | ||
21 | return NULL; | ||
22 | #endif | ||
23 | } | ||
24 | |||
25 | static inline | ||
26 | struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) | ||
27 | { | ||
28 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
29 | struct net *net = nf_ct_net(ct); | ||
30 | |||
31 | if (!net->ct.sysctl_tstamp) | ||
32 | return NULL; | ||
33 | |||
34 | return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); | ||
35 | #else | ||
36 | return NULL; | ||
37 | #endif | ||
38 | }; | ||
39 | |||
40 | static inline bool nf_ct_tstamp_enabled(struct net *net) | ||
41 | { | ||
42 | return net->ct.sysctl_tstamp != 0; | ||
43 | } | ||
44 | |||
45 | static inline void nf_ct_set_tstamp(struct net *net, bool enable) | ||
46 | { | ||
47 | net->ct.sysctl_tstamp = enable; | ||
48 | } | ||
49 | |||
50 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
51 | extern int nf_conntrack_tstamp_init(struct net *net); | ||
52 | extern void nf_conntrack_tstamp_fini(struct net *net); | ||
53 | #else | ||
54 | static inline int nf_conntrack_tstamp_init(struct net *net) | ||
55 | { | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | static inline void nf_conntrack_tstamp_fini(struct net *net) | ||
60 | { | ||
61 | return; | ||
62 | } | ||
63 | #endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */ | ||
64 | |||
65 | #endif /* _NF_CONNTRACK_TSTAMP_H */ | ||
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index f5f09f032a90..aff80b190c12 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h | |||
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat { | |||
56 | /* per conntrack: nat application helper private data */ | 56 | /* per conntrack: nat application helper private data */ |
57 | union nf_conntrack_nat_help { | 57 | union nf_conntrack_nat_help { |
58 | /* insert nat helper private data here */ | 58 | /* insert nat helper private data here */ |
59 | #if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE) | ||
59 | struct nf_nat_pptp nat_pptp_info; | 60 | struct nf_nat_pptp nat_pptp_info; |
61 | #endif | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | struct nf_conn; | 64 | struct nf_conn; |
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, | |||
84 | 86 | ||
85 | static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) | 87 | static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) |
86 | { | 88 | { |
89 | #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) | ||
87 | return nf_ct_ext_find(ct, NF_CT_EXT_NAT); | 90 | return nf_ct_ext_find(ct, NF_CT_EXT_NAT); |
91 | #else | ||
92 | return NULL; | ||
93 | #endif | ||
88 | } | 94 | } |
89 | 95 | ||
90 | #else /* !__KERNEL__: iptables wants this to compile. */ | 96 | #else /* !__KERNEL__: iptables wants this to compile. */ |
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 33602ab66190..3dc7b98effeb 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h | |||
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct, | |||
21 | enum nf_nat_manip_type manip) | 21 | enum nf_nat_manip_type manip) |
22 | { | 22 | { |
23 | if (manip == IP_NAT_MANIP_SRC) | 23 | if (manip == IP_NAT_MANIP_SRC) |
24 | return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 24 | return ct->status & IPS_SRC_NAT_DONE; |
25 | else | 25 | else |
26 | return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 26 | return ct->status & IPS_DST_NAT_DONE; |
27 | } | 27 | } |
28 | 28 | ||
29 | struct nlattr; | 29 | struct nlattr; |
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index d4958d4c6574..341eb089349e 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h | |||
@@ -21,15 +21,15 @@ struct netns_ct { | |||
21 | int sysctl_events; | 21 | int sysctl_events; |
22 | unsigned int sysctl_events_retry_timeout; | 22 | unsigned int sysctl_events_retry_timeout; |
23 | int sysctl_acct; | 23 | int sysctl_acct; |
24 | int sysctl_tstamp; | ||
24 | int sysctl_checksum; | 25 | int sysctl_checksum; |
25 | unsigned int sysctl_log_invalid; /* Log invalid packets */ | 26 | unsigned int sysctl_log_invalid; /* Log invalid packets */ |
26 | #ifdef CONFIG_SYSCTL | 27 | #ifdef CONFIG_SYSCTL |
27 | struct ctl_table_header *sysctl_header; | 28 | struct ctl_table_header *sysctl_header; |
28 | struct ctl_table_header *acct_sysctl_header; | 29 | struct ctl_table_header *acct_sysctl_header; |
30 | struct ctl_table_header *tstamp_sysctl_header; | ||
29 | struct ctl_table_header *event_sysctl_header; | 31 | struct ctl_table_header *event_sysctl_header; |
30 | #endif | 32 | #endif |
31 | int hash_vmalloc; | ||
32 | int expect_vmalloc; | ||
33 | char *slabname; | 33 | char *slabname; |
34 | }; | 34 | }; |
35 | #endif | 35 | #endif |
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h new file mode 100644 index 000000000000..259ebac904bf --- /dev/null +++ b/include/net/netns/ip_vs.h | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * IP Virtual Server | ||
3 | * Data structure for network namspace | ||
4 | * | ||
5 | */ | ||
6 | |||
7 | #ifndef IP_VS_H_ | ||
8 | #define IP_VS_H_ | ||
9 | |||
10 | #include <linux/list.h> | ||
11 | #include <linux/mutex.h> | ||
12 | #include <linux/list_nulls.h> | ||
13 | #include <linux/ip_vs.h> | ||
14 | #include <asm/atomic.h> | ||
15 | #include <linux/in.h> | ||
16 | |||
17 | struct ip_vs_stats; | ||
18 | struct ip_vs_sync_buff; | ||
19 | struct ctl_table_header; | ||
20 | |||
21 | struct netns_ipvs { | ||
22 | int gen; /* Generation */ | ||
23 | /* | ||
24 | * Hash table: for real service lookups | ||
25 | */ | ||
26 | #define IP_VS_RTAB_BITS 4 | ||
27 | #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) | ||
28 | #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) | ||
29 | |||
30 | struct list_head rs_table[IP_VS_RTAB_SIZE]; | ||
31 | /* ip_vs_app */ | ||
32 | struct list_head app_list; | ||
33 | struct mutex app_mutex; | ||
34 | struct lock_class_key app_key; /* mutex debuging */ | ||
35 | |||
36 | /* ip_vs_proto */ | ||
37 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ | ||
38 | struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; | ||
39 | /* ip_vs_proto_tcp */ | ||
40 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
41 | #define TCP_APP_TAB_BITS 4 | ||
42 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
43 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
44 | struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
45 | spinlock_t tcp_app_lock; | ||
46 | #endif | ||
47 | /* ip_vs_proto_udp */ | ||
48 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
49 | #define UDP_APP_TAB_BITS 4 | ||
50 | #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) | ||
51 | #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) | ||
52 | struct list_head udp_apps[UDP_APP_TAB_SIZE]; | ||
53 | spinlock_t udp_app_lock; | ||
54 | #endif | ||
55 | /* ip_vs_proto_sctp */ | ||
56 | #ifdef CONFIG_IP_VS_PROTO_SCTP | ||
57 | #define SCTP_APP_TAB_BITS 4 | ||
58 | #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) | ||
59 | #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) | ||
60 | /* Hash table for SCTP application incarnations */ | ||
61 | struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; | ||
62 | spinlock_t sctp_app_lock; | ||
63 | #endif | ||
64 | /* ip_vs_conn */ | ||
65 | atomic_t conn_count; /* connection counter */ | ||
66 | |||
67 | /* ip_vs_ctl */ | ||
68 | struct ip_vs_stats *tot_stats; /* Statistics & est. */ | ||
69 | struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ | ||
70 | seqcount_t *ustats_seq; /* u64 read retry */ | ||
71 | |||
72 | int num_services; /* no of virtual services */ | ||
73 | /* 1/rate drop and drop-entry variables */ | ||
74 | struct delayed_work defense_work; /* Work handler */ | ||
75 | int drop_rate; | ||
76 | int drop_counter; | ||
77 | atomic_t dropentry; | ||
78 | /* locks in ctl.c */ | ||
79 | spinlock_t dropentry_lock; /* drop entry handling */ | ||
80 | spinlock_t droppacket_lock; /* drop packet handling */ | ||
81 | spinlock_t securetcp_lock; /* state and timeout tables */ | ||
82 | rwlock_t rs_lock; /* real services table */ | ||
83 | /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ | ||
84 | struct lock_class_key ctl_key; /* ctl_mutex debuging */ | ||
85 | /* Trash for destinations */ | ||
86 | struct list_head dest_trash; | ||
87 | /* Service counters */ | ||
88 | atomic_t ftpsvc_counter; | ||
89 | atomic_t nullsvc_counter; | ||
90 | |||
91 | /* sys-ctl struct */ | ||
92 | struct ctl_table_header *sysctl_hdr; | ||
93 | struct ctl_table *sysctl_tbl; | ||
94 | /* sysctl variables */ | ||
95 | int sysctl_amemthresh; | ||
96 | int sysctl_am_droprate; | ||
97 | int sysctl_drop_entry; | ||
98 | int sysctl_drop_packet; | ||
99 | int sysctl_secure_tcp; | ||
100 | #ifdef CONFIG_IP_VS_NFCT | ||
101 | int sysctl_conntrack; | ||
102 | #endif | ||
103 | int sysctl_snat_reroute; | ||
104 | int sysctl_sync_ver; | ||
105 | int sysctl_cache_bypass; | ||
106 | int sysctl_expire_nodest_conn; | ||
107 | int sysctl_expire_quiescent_template; | ||
108 | int sysctl_sync_threshold[2]; | ||
109 | int sysctl_nat_icmp_send; | ||
110 | |||
111 | /* ip_vs_lblc */ | ||
112 | int sysctl_lblc_expiration; | ||
113 | struct ctl_table_header *lblc_ctl_header; | ||
114 | struct ctl_table *lblc_ctl_table; | ||
115 | /* ip_vs_lblcr */ | ||
116 | int sysctl_lblcr_expiration; | ||
117 | struct ctl_table_header *lblcr_ctl_header; | ||
118 | struct ctl_table *lblcr_ctl_table; | ||
119 | /* ip_vs_est */ | ||
120 | struct list_head est_list; /* estimator list */ | ||
121 | spinlock_t est_lock; | ||
122 | struct timer_list est_timer; /* Estimation timer */ | ||
123 | /* ip_vs_sync */ | ||
124 | struct list_head sync_queue; | ||
125 | spinlock_t sync_lock; | ||
126 | struct ip_vs_sync_buff *sync_buff; | ||
127 | spinlock_t sync_buff_lock; | ||
128 | struct sockaddr_in sync_mcast_addr; | ||
129 | struct task_struct *master_thread; | ||
130 | struct task_struct *backup_thread; | ||
131 | int send_mesg_maxlen; | ||
132 | int recv_mesg_maxlen; | ||
133 | volatile int sync_state; | ||
134 | volatile int master_syncid; | ||
135 | volatile int backup_syncid; | ||
136 | /* multicast interface name */ | ||
137 | char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
138 | char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
139 | /* net name space ptr */ | ||
140 | struct net *net; /* Needed by timer routines */ | ||
141 | }; | ||
142 | |||
143 | #endif /* IP_VS_H_ */ | ||
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d68c3f121774..e2e2ef57eca2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -43,7 +43,6 @@ struct netns_ipv4 { | |||
43 | struct xt_table *nat_table; | 43 | struct xt_table *nat_table; |
44 | struct hlist_head *nat_bysource; | 44 | struct hlist_head *nat_bysource; |
45 | unsigned int nat_htable_size; | 45 | unsigned int nat_htable_size; |
46 | int nat_vmalloced; | ||
47 | #endif | 46 | #endif |
48 | 47 | ||
49 | int sysctl_icmp_echo_ignore_all; | 48 | int sysctl_icmp_echo_ignore_all; |
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 160a407c1963..16626a04cb03 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h | |||
@@ -31,10 +31,12 @@ enum qdisc_state_t { | |||
31 | * following bits are only changed while qdisc lock is held | 31 | * following bits are only changed while qdisc lock is held |
32 | */ | 32 | */ |
33 | enum qdisc___state_t { | 33 | enum qdisc___state_t { |
34 | __QDISC___STATE_RUNNING, | 34 | __QDISC___STATE_RUNNING = 1, |
35 | __QDISC___STATE_THROTTLED = 2, | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | struct qdisc_size_table { | 38 | struct qdisc_size_table { |
39 | struct rcu_head rcu; | ||
38 | struct list_head list; | 40 | struct list_head list; |
39 | struct tc_sizespec szopts; | 41 | struct tc_sizespec szopts; |
40 | int refcnt; | 42 | int refcnt; |
@@ -46,14 +48,13 @@ struct Qdisc { | |||
46 | struct sk_buff * (*dequeue)(struct Qdisc *dev); | 48 | struct sk_buff * (*dequeue)(struct Qdisc *dev); |
47 | unsigned flags; | 49 | unsigned flags; |
48 | #define TCQ_F_BUILTIN 1 | 50 | #define TCQ_F_BUILTIN 1 |
49 | #define TCQ_F_THROTTLED 2 | 51 | #define TCQ_F_INGRESS 2 |
50 | #define TCQ_F_INGRESS 4 | 52 | #define TCQ_F_CAN_BYPASS 4 |
51 | #define TCQ_F_CAN_BYPASS 8 | 53 | #define TCQ_F_MQROOT 8 |
52 | #define TCQ_F_MQROOT 16 | ||
53 | #define TCQ_F_WARN_NONWC (1 << 16) | 54 | #define TCQ_F_WARN_NONWC (1 << 16) |
54 | int padded; | 55 | int padded; |
55 | struct Qdisc_ops *ops; | 56 | struct Qdisc_ops *ops; |
56 | struct qdisc_size_table *stab; | 57 | struct qdisc_size_table __rcu *stab; |
57 | struct list_head list; | 58 | struct list_head list; |
58 | u32 handle; | 59 | u32 handle; |
59 | u32 parent; | 60 | u32 parent; |
@@ -78,25 +79,43 @@ struct Qdisc { | |||
78 | unsigned long state; | 79 | unsigned long state; |
79 | struct sk_buff_head q; | 80 | struct sk_buff_head q; |
80 | struct gnet_stats_basic_packed bstats; | 81 | struct gnet_stats_basic_packed bstats; |
81 | unsigned long __state; | 82 | unsigned int __state; |
82 | struct gnet_stats_queue qstats; | 83 | struct gnet_stats_queue qstats; |
83 | struct rcu_head rcu_head; | 84 | struct rcu_head rcu_head; |
84 | spinlock_t busylock; | 85 | spinlock_t busylock; |
85 | }; | 86 | }; |
86 | 87 | ||
87 | static inline bool qdisc_is_running(struct Qdisc *qdisc) | 88 | static inline bool qdisc_is_running(const struct Qdisc *qdisc) |
88 | { | 89 | { |
89 | return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state); | 90 | return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false; |
90 | } | 91 | } |
91 | 92 | ||
92 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) | 93 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
93 | { | 94 | { |
94 | return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state); | 95 | if (qdisc_is_running(qdisc)) |
96 | return false; | ||
97 | qdisc->__state |= __QDISC___STATE_RUNNING; | ||
98 | return true; | ||
95 | } | 99 | } |
96 | 100 | ||
97 | static inline void qdisc_run_end(struct Qdisc *qdisc) | 101 | static inline void qdisc_run_end(struct Qdisc *qdisc) |
98 | { | 102 | { |
99 | __clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state); | 103 | qdisc->__state &= ~__QDISC___STATE_RUNNING; |
104 | } | ||
105 | |||
106 | static inline bool qdisc_is_throttled(const struct Qdisc *qdisc) | ||
107 | { | ||
108 | return (qdisc->__state & __QDISC___STATE_THROTTLED) ? true : false; | ||
109 | } | ||
110 | |||
111 | static inline void qdisc_throttled(struct Qdisc *qdisc) | ||
112 | { | ||
113 | qdisc->__state |= __QDISC___STATE_THROTTLED; | ||
114 | } | ||
115 | |||
116 | static inline void qdisc_unthrottled(struct Qdisc *qdisc) | ||
117 | { | ||
118 | qdisc->__state &= ~__QDISC___STATE_THROTTLED; | ||
100 | } | 119 | } |
101 | 120 | ||
102 | struct Qdisc_class_ops { | 121 | struct Qdisc_class_ops { |
@@ -331,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |||
331 | struct Qdisc_ops *ops); | 350 | struct Qdisc_ops *ops); |
332 | extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, | 351 | extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, |
333 | struct Qdisc_ops *ops, u32 parentid); | 352 | struct Qdisc_ops *ops, u32 parentid); |
334 | extern void qdisc_calculate_pkt_len(struct sk_buff *skb, | 353 | extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, |
335 | struct qdisc_size_table *stab); | 354 | const struct qdisc_size_table *stab); |
336 | extern void tcf_destroy(struct tcf_proto *tp); | 355 | extern void tcf_destroy(struct tcf_proto *tp); |
337 | extern void tcf_destroy_chain(struct tcf_proto **fl); | 356 | extern void tcf_destroy_chain(struct tcf_proto **fl); |
338 | 357 | ||
@@ -411,12 +430,20 @@ enum net_xmit_qdisc_t { | |||
411 | #define net_xmit_drop_count(e) (1) | 430 | #define net_xmit_drop_count(e) (1) |
412 | #endif | 431 | #endif |
413 | 432 | ||
414 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 433 | static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, |
434 | const struct Qdisc *sch) | ||
415 | { | 435 | { |
416 | #ifdef CONFIG_NET_SCHED | 436 | #ifdef CONFIG_NET_SCHED |
417 | if (sch->stab) | 437 | struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); |
418 | qdisc_calculate_pkt_len(skb, sch->stab); | 438 | |
439 | if (stab) | ||
440 | __qdisc_calculate_pkt_len(skb, stab); | ||
419 | #endif | 441 | #endif |
442 | } | ||
443 | |||
444 | static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
445 | { | ||
446 | qdisc_calculate_pkt_len(skb, sch); | ||
420 | return sch->enqueue(skb, sch); | 447 | return sch->enqueue(skb, sch); |
421 | } | 448 | } |
422 | 449 | ||
diff --git a/include/net/sock.h b/include/net/sock.h index d884d268c704..ba6465bf7c7a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1189,7 +1189,7 @@ extern void sk_filter_release_rcu(struct rcu_head *rcu); | |||
1189 | static inline void sk_filter_release(struct sk_filter *fp) | 1189 | static inline void sk_filter_release(struct sk_filter *fp) |
1190 | { | 1190 | { |
1191 | if (atomic_dec_and_test(&fp->refcnt)) | 1191 | if (atomic_dec_and_test(&fp->refcnt)) |
1192 | call_rcu_bh(&fp->rcu, sk_filter_release_rcu); | 1192 | call_rcu(&fp->rcu, sk_filter_release_rcu); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) | 1195 | static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) |
diff --git a/kernel/audit.c b/kernel/audit.c index e4956244ae50..162e88e33bc9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -74,6 +74,8 @@ static int audit_initialized; | |||
74 | int audit_enabled; | 74 | int audit_enabled; |
75 | int audit_ever_enabled; | 75 | int audit_ever_enabled; |
76 | 76 | ||
77 | EXPORT_SYMBOL_GPL(audit_enabled); | ||
78 | |||
77 | /* Default state when kernel boots without any parameters. */ | 79 | /* Default state when kernel boots without any parameters. */ |
78 | static int audit_default; | 80 | static int audit_default; |
79 | 81 | ||
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 17c5ba7551a5..29a54ccd213d 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c | |||
@@ -59,7 +59,6 @@ | |||
59 | * safely advertise a maxsize | 59 | * safely advertise a maxsize |
60 | * of 64k */ | 60 | * of 64k */ |
61 | 61 | ||
62 | #define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) | ||
63 | /** | 62 | /** |
64 | * struct p9_trans_rdma - RDMA transport instance | 63 | * struct p9_trans_rdma - RDMA transport instance |
65 | * | 64 | * |
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 50a46afc2bcc..2ed0056a39a8 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c | |||
@@ -22,9 +22,15 @@ | |||
22 | #include <linux/netfilter_bridge/ebtables.h> | 22 | #include <linux/netfilter_bridge/ebtables.h> |
23 | #include <linux/netfilter_bridge/ebt_ip6.h> | 23 | #include <linux/netfilter_bridge/ebt_ip6.h> |
24 | 24 | ||
25 | struct tcpudphdr { | 25 | union pkthdr { |
26 | __be16 src; | 26 | struct { |
27 | __be16 dst; | 27 | __be16 src; |
28 | __be16 dst; | ||
29 | } tcpudphdr; | ||
30 | struct { | ||
31 | u8 type; | ||
32 | u8 code; | ||
33 | } icmphdr; | ||
28 | }; | 34 | }; |
29 | 35 | ||
30 | static bool | 36 | static bool |
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
33 | const struct ebt_ip6_info *info = par->matchinfo; | 39 | const struct ebt_ip6_info *info = par->matchinfo; |
34 | const struct ipv6hdr *ih6; | 40 | const struct ipv6hdr *ih6; |
35 | struct ipv6hdr _ip6h; | 41 | struct ipv6hdr _ip6h; |
36 | const struct tcpudphdr *pptr; | 42 | const union pkthdr *pptr; |
37 | struct tcpudphdr _ports; | 43 | union pkthdr _pkthdr; |
38 | 44 | ||
39 | ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); | 45 | ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); |
40 | if (ih6 == NULL) | 46 | if (ih6 == NULL) |
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
56 | return false; | 62 | return false; |
57 | if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) | 63 | if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) |
58 | return false; | 64 | return false; |
59 | if (!(info->bitmask & EBT_IP6_DPORT) && | 65 | if (!(info->bitmask & ( EBT_IP6_DPORT | |
60 | !(info->bitmask & EBT_IP6_SPORT)) | 66 | EBT_IP6_SPORT | EBT_IP6_ICMP6))) |
61 | return true; | 67 | return true; |
62 | pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), | 68 | |
63 | &_ports); | 69 | /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ |
70 | pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), | ||
71 | &_pkthdr); | ||
64 | if (pptr == NULL) | 72 | if (pptr == NULL) |
65 | return false; | 73 | return false; |
66 | if (info->bitmask & EBT_IP6_DPORT) { | 74 | if (info->bitmask & EBT_IP6_DPORT) { |
67 | u32 dst = ntohs(pptr->dst); | 75 | u16 dst = ntohs(pptr->tcpudphdr.dst); |
68 | if (FWINV(dst < info->dport[0] || | 76 | if (FWINV(dst < info->dport[0] || |
69 | dst > info->dport[1], EBT_IP6_DPORT)) | 77 | dst > info->dport[1], EBT_IP6_DPORT)) |
70 | return false; | 78 | return false; |
71 | } | 79 | } |
72 | if (info->bitmask & EBT_IP6_SPORT) { | 80 | if (info->bitmask & EBT_IP6_SPORT) { |
73 | u32 src = ntohs(pptr->src); | 81 | u16 src = ntohs(pptr->tcpudphdr.src); |
74 | if (FWINV(src < info->sport[0] || | 82 | if (FWINV(src < info->sport[0] || |
75 | src > info->sport[1], EBT_IP6_SPORT)) | 83 | src > info->sport[1], EBT_IP6_SPORT)) |
76 | return false; | 84 | return false; |
77 | } | 85 | } |
78 | return true; | 86 | if ((info->bitmask & EBT_IP6_ICMP6) && |
87 | FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || | ||
88 | pptr->icmphdr.type > info->icmpv6_type[1] || | ||
89 | pptr->icmphdr.code < info->icmpv6_code[0] || | ||
90 | pptr->icmphdr.code > info->icmpv6_code[1], | ||
91 | EBT_IP6_ICMP6)) | ||
92 | return false; | ||
79 | } | 93 | } |
80 | return true; | 94 | return true; |
81 | } | 95 | } |
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) | |||
103 | return -EINVAL; | 117 | return -EINVAL; |
104 | if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) | 118 | if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) |
105 | return -EINVAL; | 119 | return -EINVAL; |
120 | if (info->bitmask & EBT_IP6_ICMP6) { | ||
121 | if ((info->invflags & EBT_IP6_PROTO) || | ||
122 | info->protocol != IPPROTO_ICMPV6) | ||
123 | return -EINVAL; | ||
124 | if (info->icmpv6_type[0] > info->icmpv6_type[1] || | ||
125 | info->icmpv6_code[0] > info->icmpv6_code[1]) | ||
126 | return -EINVAL; | ||
127 | } | ||
106 | return 0; | 128 | return 0; |
107 | } | 129 | } |
108 | 130 | ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 16df0532d4b9..5f1825df9dca 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c | |||
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info, | |||
1764 | 1764 | ||
1765 | newinfo->entries_size = size; | 1765 | newinfo->entries_size = size; |
1766 | 1766 | ||
1767 | xt_compat_init_offsets(AF_INET, info->nentries); | ||
1767 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, | 1768 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, |
1768 | entries, newinfo); | 1769 | entries, newinfo); |
1769 | } | 1770 | } |
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index c665de778b60..f1f98d967d8a 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c | |||
@@ -23,10 +23,8 @@ | |||
23 | #include <asm/atomic.h> | 23 | #include <asm/atomic.h> |
24 | 24 | ||
25 | #define MAX_PHY_LAYERS 7 | 25 | #define MAX_PHY_LAYERS 7 |
26 | #define PHY_NAME_LEN 20 | ||
27 | 26 | ||
28 | #define container_obj(layr) container_of(layr, struct cfcnfg, layer) | 27 | #define container_obj(layr) container_of(layr, struct cfcnfg, layer) |
29 | #define RFM_FRAGMENT_SIZE 4030 | ||
30 | 28 | ||
31 | /* Information about CAIF physical interfaces held by Config Module in order | 29 | /* Information about CAIF physical interfaces held by Config Module in order |
32 | * to manage physical interfaces | 30 | * to manage physical interfaces |
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index d3ed264ad6c4..27dab26ad3b8 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #define DGM_CMD_BIT 0x80 | 18 | #define DGM_CMD_BIT 0x80 |
19 | #define DGM_FLOW_OFF 0x81 | 19 | #define DGM_FLOW_OFF 0x81 |
20 | #define DGM_FLOW_ON 0x80 | 20 | #define DGM_FLOW_ON 0x80 |
21 | #define DGM_CTRL_PKT_SIZE 1 | ||
22 | #define DGM_MTU 1500 | 21 | #define DGM_MTU 1500 |
23 | 22 | ||
24 | static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); | 23 | static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); |
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 9297f7dea9d8..8303fe3ebf89 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c | |||
@@ -25,7 +25,6 @@ struct cfserl { | |||
25 | spinlock_t sync; | 25 | spinlock_t sync; |
26 | bool usestx; | 26 | bool usestx; |
27 | }; | 27 | }; |
28 | #define STXLEN(layr) (layr->usestx ? 1 : 0) | ||
29 | 28 | ||
30 | static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); | 29 | static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); |
31 | static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); | 30 | static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); |
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index efad410e4c82..315c0d601368 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c | |||
@@ -20,7 +20,7 @@ | |||
20 | #define UTIL_REMOTE_SHUTDOWN 0x82 | 20 | #define UTIL_REMOTE_SHUTDOWN 0x82 |
21 | #define UTIL_FLOW_OFF 0x81 | 21 | #define UTIL_FLOW_OFF 0x81 |
22 | #define UTIL_FLOW_ON 0x80 | 22 | #define UTIL_FLOW_ON 0x80 |
23 | #define UTIL_CTRL_PKT_SIZE 1 | 23 | |
24 | static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); | 24 | static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); |
25 | static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); | 25 | static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); |
26 | 26 | ||
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 3b425b189a99..c3b1dec4acf6 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #define VEI_FLOW_OFF 0x81 | 17 | #define VEI_FLOW_OFF 0x81 |
18 | #define VEI_FLOW_ON 0x80 | 18 | #define VEI_FLOW_ON 0x80 |
19 | #define VEI_SET_PIN 0x82 | 19 | #define VEI_SET_PIN 0x82 |
20 | #define VEI_CTRL_PKT_SIZE 1 | 20 | |
21 | #define container_obj(layr) container_of(layr, struct cfsrvl, layer) | 21 | #define container_obj(layr) container_of(layr, struct cfsrvl, layer) |
22 | 22 | ||
23 | static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); | 23 | static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); |
diff --git a/net/core/dev.c b/net/core/dev.c index 24ea2d71e7ea..d162ba8d622d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1286,7 +1286,7 @@ static int __dev_close(struct net_device *dev) | |||
1286 | return __dev_close_many(&single); | 1286 | return __dev_close_many(&single); |
1287 | } | 1287 | } |
1288 | 1288 | ||
1289 | int dev_close_many(struct list_head *head) | 1289 | static int dev_close_many(struct list_head *head) |
1290 | { | 1290 | { |
1291 | struct net_device *dev, *tmp; | 1291 | struct net_device *dev, *tmp; |
1292 | LIST_HEAD(tmp_list); | 1292 | LIST_HEAD(tmp_list); |
@@ -1594,6 +1594,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1594 | rcu_read_unlock(); | 1594 | rcu_read_unlock(); |
1595 | } | 1595 | } |
1596 | 1596 | ||
1597 | /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change | ||
1598 | * @dev: Network device | ||
1599 | * @txq: number of queues available | ||
1600 | * | ||
1601 | * If real_num_tx_queues is changed the tc mappings may no longer be | ||
1602 | * valid. To resolve this verify the tc mapping remains valid and if | ||
1603 | * not NULL the mapping. With no priorities mapping to this | ||
1604 | * offset/count pair it will no longer be used. In the worst case TC0 | ||
1605 | * is invalid nothing can be done so disable priority mappings. If is | ||
1606 | * expected that drivers will fix this mapping if they can before | ||
1607 | * calling netif_set_real_num_tx_queues. | ||
1608 | */ | ||
1609 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) | ||
1610 | { | ||
1611 | int i; | ||
1612 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; | ||
1613 | |||
1614 | /* If TC0 is invalidated disable TC mapping */ | ||
1615 | if (tc->offset + tc->count > txq) { | ||
1616 | pr_warning("Number of in use tx queues changed " | ||
1617 | "invalidating tc mappings. Priority " | ||
1618 | "traffic classification disabled!\n"); | ||
1619 | dev->num_tc = 0; | ||
1620 | return; | ||
1621 | } | ||
1622 | |||
1623 | /* Invalidated prio to tc mappings set to TC0 */ | ||
1624 | for (i = 1; i < TC_BITMASK + 1; i++) { | ||
1625 | int q = netdev_get_prio_tc_map(dev, i); | ||
1626 | |||
1627 | tc = &dev->tc_to_txq[q]; | ||
1628 | if (tc->offset + tc->count > txq) { | ||
1629 | pr_warning("Number of in use tx queues " | ||
1630 | "changed. Priority %i to tc " | ||
1631 | "mapping %i is no longer valid " | ||
1632 | "setting map to 0\n", | ||
1633 | i, q); | ||
1634 | netdev_set_prio_tc_map(dev, i, 0); | ||
1635 | } | ||
1636 | } | ||
1637 | } | ||
1638 | |||
1597 | /* | 1639 | /* |
1598 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1640 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1599 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1641 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
@@ -1613,6 +1655,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
1613 | if (rc) | 1655 | if (rc) |
1614 | return rc; | 1656 | return rc; |
1615 | 1657 | ||
1658 | if (dev->num_tc) | ||
1659 | netif_setup_tc(dev, txq); | ||
1660 | |||
1616 | if (txq < dev->real_num_tx_queues) | 1661 | if (txq < dev->real_num_tx_queues) |
1617 | qdisc_reset_all_tx_gt(dev, txq); | 1662 | qdisc_reset_all_tx_gt(dev, txq); |
1618 | } | 1663 | } |
@@ -2162,6 +2207,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | |||
2162 | unsigned int num_tx_queues) | 2207 | unsigned int num_tx_queues) |
2163 | { | 2208 | { |
2164 | u32 hash; | 2209 | u32 hash; |
2210 | u16 qoffset = 0; | ||
2211 | u16 qcount = num_tx_queues; | ||
2165 | 2212 | ||
2166 | if (skb_rx_queue_recorded(skb)) { | 2213 | if (skb_rx_queue_recorded(skb)) { |
2167 | hash = skb_get_rx_queue(skb); | 2214 | hash = skb_get_rx_queue(skb); |
@@ -2170,13 +2217,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | |||
2170 | return hash; | 2217 | return hash; |
2171 | } | 2218 | } |
2172 | 2219 | ||
2220 | if (dev->num_tc) { | ||
2221 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2222 | qoffset = dev->tc_to_txq[tc].offset; | ||
2223 | qcount = dev->tc_to_txq[tc].count; | ||
2224 | } | ||
2225 | |||
2173 | if (skb->sk && skb->sk->sk_hash) | 2226 | if (skb->sk && skb->sk->sk_hash) |
2174 | hash = skb->sk->sk_hash; | 2227 | hash = skb->sk->sk_hash; |
2175 | else | 2228 | else |
2176 | hash = (__force u16) skb->protocol ^ skb->rxhash; | 2229 | hash = (__force u16) skb->protocol ^ skb->rxhash; |
2177 | hash = jhash_1word(hash, hashrnd); | 2230 | hash = jhash_1word(hash, hashrnd); |
2178 | 2231 | ||
2179 | return (u16) (((u64) hash * num_tx_queues) >> 32); | 2232 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; |
2180 | } | 2233 | } |
2181 | EXPORT_SYMBOL(__skb_tx_hash); | 2234 | EXPORT_SYMBOL(__skb_tx_hash); |
2182 | 2235 | ||
@@ -2273,15 +2326,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2273 | struct netdev_queue *txq) | 2326 | struct netdev_queue *txq) |
2274 | { | 2327 | { |
2275 | spinlock_t *root_lock = qdisc_lock(q); | 2328 | spinlock_t *root_lock = qdisc_lock(q); |
2276 | bool contended = qdisc_is_running(q); | 2329 | bool contended; |
2277 | int rc; | 2330 | int rc; |
2278 | 2331 | ||
2332 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2333 | qdisc_calculate_pkt_len(skb, q); | ||
2279 | /* | 2334 | /* |
2280 | * Heuristic to force contended enqueues to serialize on a | 2335 | * Heuristic to force contended enqueues to serialize on a |
2281 | * separate lock before trying to get qdisc main lock. | 2336 | * separate lock before trying to get qdisc main lock. |
2282 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2337 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2283 | * and dequeue packets faster. | 2338 | * and dequeue packets faster. |
2284 | */ | 2339 | */ |
2340 | contended = qdisc_is_running(q); | ||
2285 | if (unlikely(contended)) | 2341 | if (unlikely(contended)) |
2286 | spin_lock(&q->busylock); | 2342 | spin_lock(&q->busylock); |
2287 | 2343 | ||
@@ -2299,7 +2355,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2299 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2355 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2300 | skb_dst_force(skb); | 2356 | skb_dst_force(skb); |
2301 | 2357 | ||
2302 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2303 | qdisc_bstats_update(q, skb); | 2358 | qdisc_bstats_update(q, skb); |
2304 | 2359 | ||
2305 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2360 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
@@ -2314,7 +2369,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2314 | rc = NET_XMIT_SUCCESS; | 2369 | rc = NET_XMIT_SUCCESS; |
2315 | } else { | 2370 | } else { |
2316 | skb_dst_force(skb); | 2371 | skb_dst_force(skb); |
2317 | rc = qdisc_enqueue_root(skb, q); | 2372 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2318 | if (qdisc_run_begin(q)) { | 2373 | if (qdisc_run_begin(q)) { |
2319 | if (unlikely(contended)) { | 2374 | if (unlikely(contended)) { |
2320 | spin_unlock(&q->busylock); | 2375 | spin_unlock(&q->busylock); |
@@ -4573,6 +4628,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
4573 | EXPORT_SYMBOL(dev_set_mtu); | 4628 | EXPORT_SYMBOL(dev_set_mtu); |
4574 | 4629 | ||
4575 | /** | 4630 | /** |
4631 | * dev_set_group - Change group this device belongs to | ||
4632 | * @dev: device | ||
4633 | * @new_group: group this device should belong to | ||
4634 | */ | ||
4635 | void dev_set_group(struct net_device *dev, int new_group) | ||
4636 | { | ||
4637 | dev->group = new_group; | ||
4638 | } | ||
4639 | EXPORT_SYMBOL(dev_set_group); | ||
4640 | |||
4641 | /** | ||
4576 | * dev_set_mac_address - Change Media Access Control Address | 4642 | * dev_set_mac_address - Change Media Access Control Address |
4577 | * @dev: device | 4643 | * @dev: device |
4578 | * @sa: new address | 4644 | * @sa: new address |
@@ -5679,6 +5745,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
5679 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5745 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5680 | setup(dev); | 5746 | setup(dev); |
5681 | strcpy(dev->name, name); | 5747 | strcpy(dev->name, name); |
5748 | dev->group = INIT_NETDEV_GROUP; | ||
5682 | return dev; | 5749 | return dev; |
5683 | 5750 | ||
5684 | free_pcpu: | 5751 | free_pcpu: |
diff --git a/net/core/filter.c b/net/core/filter.c index afc58374ca96..232b1873bb28 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
142 | if (err) | 142 | if (err) |
143 | return err; | 143 | return err; |
144 | 144 | ||
145 | rcu_read_lock_bh(); | 145 | rcu_read_lock(); |
146 | filter = rcu_dereference_bh(sk->sk_filter); | 146 | filter = rcu_dereference(sk->sk_filter); |
147 | if (filter) { | 147 | if (filter) { |
148 | unsigned int pkt_len = sk_run_filter(skb, filter->insns); | 148 | unsigned int pkt_len = sk_run_filter(skb, filter->insns); |
149 | 149 | ||
150 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; | 150 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; |
151 | } | 151 | } |
152 | rcu_read_unlock_bh(); | 152 | rcu_read_unlock(); |
153 | 153 | ||
154 | return err; | 154 | return err; |
155 | } | 155 | } |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a902913429..799f06e03a22 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) | |||
316 | { | 316 | { |
317 | size_t size = entries * sizeof(struct neighbour *); | 317 | size_t size = entries * sizeof(struct neighbour *); |
318 | struct neigh_hash_table *ret; | 318 | struct neigh_hash_table *ret; |
319 | struct neighbour **buckets; | 319 | struct neighbour __rcu **buckets; |
320 | 320 | ||
321 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); | 321 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); |
322 | if (!ret) | 322 | if (!ret) |
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) | |||
324 | if (size <= PAGE_SIZE) | 324 | if (size <= PAGE_SIZE) |
325 | buckets = kzalloc(size, GFP_ATOMIC); | 325 | buckets = kzalloc(size, GFP_ATOMIC); |
326 | else | 326 | else |
327 | buckets = (struct neighbour **) | 327 | buckets = (struct neighbour __rcu **) |
328 | __get_free_pages(GFP_ATOMIC | __GFP_ZERO, | 328 | __get_free_pages(GFP_ATOMIC | __GFP_ZERO, |
329 | get_order(size)); | 329 | get_order(size)); |
330 | if (!buckets) { | 330 | if (!buckets) { |
331 | kfree(ret); | 331 | kfree(ret); |
332 | return NULL; | 332 | return NULL; |
333 | } | 333 | } |
334 | rcu_assign_pointer(ret->hash_buckets, buckets); | 334 | ret->hash_buckets = buckets; |
335 | ret->hash_mask = entries - 1; | 335 | ret->hash_mask = entries - 1; |
336 | get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); | 336 | get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); |
337 | return ret; | 337 | return ret; |
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) | |||
343 | struct neigh_hash_table, | 343 | struct neigh_hash_table, |
344 | rcu); | 344 | rcu); |
345 | size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); | 345 | size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); |
346 | struct neighbour **buckets = nht->hash_buckets; | 346 | struct neighbour __rcu **buckets = nht->hash_buckets; |
347 | 347 | ||
348 | if (size <= PAGE_SIZE) | 348 | if (size <= PAGE_SIZE) |
349 | kfree(buckets); | 349 | kfree(buckets); |
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1540 | panic("cannot create neighbour proc dir entry"); | 1540 | panic("cannot create neighbour proc dir entry"); |
1541 | #endif | 1541 | #endif |
1542 | 1542 | ||
1543 | tbl->nht = neigh_hash_alloc(8); | 1543 | RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); |
1544 | 1544 | ||
1545 | phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); | 1545 | phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); |
1546 | tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); | 1546 | tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); |
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1602 | } | 1602 | } |
1603 | write_unlock(&neigh_tbl_lock); | 1603 | write_unlock(&neigh_tbl_lock); |
1604 | 1604 | ||
1605 | call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); | 1605 | call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, |
1606 | neigh_hash_free_rcu); | ||
1606 | tbl->nht = NULL; | 1607 | tbl->nht = NULL; |
1607 | 1608 | ||
1608 | kfree(tbl->phash_buckets); | 1609 | kfree(tbl->phash_buckets); |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57f3bb3..c668f8c371b2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
868 | netif_running(dev) ? dev->operstate : IF_OPER_DOWN); | 868 | netif_running(dev) ? dev->operstate : IF_OPER_DOWN); |
869 | NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); | 869 | NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); |
870 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); | 870 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); |
871 | NLA_PUT_U32(skb, IFLA_GROUP, dev->group); | ||
871 | 872 | ||
872 | if (dev->ifindex != dev->iflink) | 873 | if (dev->ifindex != dev->iflink) |
873 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); | 874 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); |
@@ -1265,6 +1266,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
1265 | modified = 1; | 1266 | modified = 1; |
1266 | } | 1267 | } |
1267 | 1268 | ||
1269 | if (tb[IFLA_GROUP]) { | ||
1270 | dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); | ||
1271 | modified = 1; | ||
1272 | } | ||
1273 | |||
1268 | /* | 1274 | /* |
1269 | * Interface selected by interface index but interface | 1275 | * Interface selected by interface index but interface |
1270 | * name provided implies that a name change has been | 1276 | * name provided implies that a name change has been |
@@ -1542,6 +1548,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, | |||
1542 | set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); | 1548 | set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); |
1543 | if (tb[IFLA_LINKMODE]) | 1549 | if (tb[IFLA_LINKMODE]) |
1544 | dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); | 1550 | dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); |
1551 | if (tb[IFLA_GROUP]) | ||
1552 | dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); | ||
1545 | 1553 | ||
1546 | return dev; | 1554 | return dev; |
1547 | 1555 | ||
@@ -1552,6 +1560,24 @@ err: | |||
1552 | } | 1560 | } |
1553 | EXPORT_SYMBOL(rtnl_create_link); | 1561 | EXPORT_SYMBOL(rtnl_create_link); |
1554 | 1562 | ||
1563 | static int rtnl_group_changelink(struct net *net, int group, | ||
1564 | struct ifinfomsg *ifm, | ||
1565 | struct nlattr **tb) | ||
1566 | { | ||
1567 | struct net_device *dev; | ||
1568 | int err; | ||
1569 | |||
1570 | for_each_netdev(net, dev) { | ||
1571 | if (dev->group == group) { | ||
1572 | err = do_setlink(dev, ifm, tb, NULL, 0); | ||
1573 | if (err < 0) | ||
1574 | return err; | ||
1575 | } | ||
1576 | } | ||
1577 | |||
1578 | return 0; | ||
1579 | } | ||
1580 | |||
1555 | static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 1581 | static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
1556 | { | 1582 | { |
1557 | struct net *net = sock_net(skb->sk); | 1583 | struct net *net = sock_net(skb->sk); |
@@ -1579,10 +1605,12 @@ replay: | |||
1579 | ifm = nlmsg_data(nlh); | 1605 | ifm = nlmsg_data(nlh); |
1580 | if (ifm->ifi_index > 0) | 1606 | if (ifm->ifi_index > 0) |
1581 | dev = __dev_get_by_index(net, ifm->ifi_index); | 1607 | dev = __dev_get_by_index(net, ifm->ifi_index); |
1582 | else if (ifname[0]) | 1608 | else { |
1583 | dev = __dev_get_by_name(net, ifname); | 1609 | if (ifname[0]) |
1584 | else | 1610 | dev = __dev_get_by_name(net, ifname); |
1585 | dev = NULL; | 1611 | else |
1612 | dev = NULL; | ||
1613 | } | ||
1586 | 1614 | ||
1587 | err = validate_linkmsg(dev, tb); | 1615 | err = validate_linkmsg(dev, tb); |
1588 | if (err < 0) | 1616 | if (err < 0) |
@@ -1646,8 +1674,13 @@ replay: | |||
1646 | return do_setlink(dev, ifm, tb, ifname, modified); | 1674 | return do_setlink(dev, ifm, tb, ifname, modified); |
1647 | } | 1675 | } |
1648 | 1676 | ||
1649 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) | 1677 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { |
1678 | if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) | ||
1679 | return rtnl_group_changelink(net, | ||
1680 | nla_get_u32(tb[IFLA_GROUP]), | ||
1681 | ifm, tb); | ||
1650 | return -ENODEV; | 1682 | return -ENODEV; |
1683 | } | ||
1651 | 1684 | ||
1652 | if (ifm->ifi_index) | 1685 | if (ifm->ifi_index) |
1653 | return -EOPNOTSUPP; | 1686 | return -EOPNOTSUPP; |
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f2abd3755690..b66600b3f4b5 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c | |||
@@ -59,7 +59,6 @@ struct dn_hash | |||
59 | }; | 59 | }; |
60 | 60 | ||
61 | #define dz_key_0(key) ((key).datum = 0) | 61 | #define dz_key_0(key) ((key).datum = 0) |
62 | #define dz_prefix(key,dz) ((key).datum) | ||
63 | 62 | ||
64 | #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ | 63 | #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ |
65 | for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) | 64 | for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a5a1050595d1..8949a05ac307 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE | |||
140 | handled by the klogd daemon which is responsible for kernel messages | 140 | handled by the klogd daemon which is responsible for kernel messages |
141 | ("man klogd"). | 141 | ("man klogd"). |
142 | 142 | ||
143 | config IP_ROUTE_CLASSID | ||
144 | bool | ||
145 | |||
143 | config IP_PNP | 146 | config IP_PNP |
144 | bool "IP: kernel level autoconfiguration" | 147 | bool "IP: kernel level autoconfiguration" |
145 | help | 148 | help |
@@ -657,4 +660,3 @@ config TCP_MD5SIG | |||
657 | on the Internet. | 660 | on the Internet. |
658 | 661 | ||
659 | If unsure, say N. | 662 | If unsure, say N. |
660 | |||
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24f5c7b..9cefe72029cf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -41,12 +41,12 @@ struct fib4_rule { | |||
41 | __be32 srcmask; | 41 | __be32 srcmask; |
42 | __be32 dst; | 42 | __be32 dst; |
43 | __be32 dstmask; | 43 | __be32 dstmask; |
44 | #ifdef CONFIG_NET_CLS_ROUTE | 44 | #ifdef CONFIG_IP_ROUTE_CLASSID |
45 | u32 tclassid; | 45 | u32 tclassid; |
46 | #endif | 46 | #endif |
47 | }; | 47 | }; |
48 | 48 | ||
49 | #ifdef CONFIG_NET_CLS_ROUTE | 49 | #ifdef CONFIG_IP_ROUTE_CLASSID |
50 | u32 fib_rules_tclass(struct fib_result *res) | 50 | u32 fib_rules_tclass(struct fib_result *res) |
51 | { | 51 | { |
52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; | 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
165 | if (frh->dst_len) | 165 | if (frh->dst_len) |
166 | rule4->dst = nla_get_be32(tb[FRA_DST]); | 166 | rule4->dst = nla_get_be32(tb[FRA_DST]); |
167 | 167 | ||
168 | #ifdef CONFIG_NET_CLS_ROUTE | 168 | #ifdef CONFIG_IP_ROUTE_CLASSID |
169 | if (tb[FRA_FLOW]) | 169 | if (tb[FRA_FLOW]) |
170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | 170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); |
171 | #endif | 171 | #endif |
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | |||
195 | if (frh->tos && (rule4->tos != frh->tos)) | 195 | if (frh->tos && (rule4->tos != frh->tos)) |
196 | return 0; | 196 | return 0; |
197 | 197 | ||
198 | #ifdef CONFIG_NET_CLS_ROUTE | 198 | #ifdef CONFIG_IP_ROUTE_CLASSID |
199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) | 199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
200 | return 0; | 200 | return 0; |
201 | #endif | 201 | #endif |
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
224 | if (rule4->src_len) | 224 | if (rule4->src_len) |
225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); |
226 | 226 | ||
227 | #ifdef CONFIG_NET_CLS_ROUTE | 227 | #ifdef CONFIG_IP_ROUTE_CLASSID |
228 | if (rule4->tclassid) | 228 | if (rule4->tclassid) |
229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
230 | #endif | 230 | #endif |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 12d3dc3df1b7..9aff11d7278f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | |||
200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
201 | nh->nh_weight != onh->nh_weight || | 201 | nh->nh_weight != onh->nh_weight || |
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_NET_CLS_ROUTE | 203 | #ifdef CONFIG_IP_ROUTE_CLASSID |
204 | nh->nh_tclassid != onh->nh_tclassid || | 204 | nh->nh_tclassid != onh->nh_tclassid || |
205 | #endif | 205 | #endif |
206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) | 206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) |
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
422 | 422 | ||
423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
425 | #ifdef CONFIG_NET_CLS_ROUTE | 425 | #ifdef CONFIG_IP_ROUTE_CLASSID |
426 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 426 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
428 | #endif | 428 | #endif |
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) | |||
476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
477 | if (nla && nla_get_be32(nla) != nh->nh_gw) | 477 | if (nla && nla_get_be32(nla) != nh->nh_gw) |
478 | return 1; | 478 | return 1; |
479 | #ifdef CONFIG_NET_CLS_ROUTE | 479 | #ifdef CONFIG_IP_ROUTE_CLASSID |
480 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 480 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) | 481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
482 | return 1; | 482 | return 1; |
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
779 | goto err_inval; | 779 | goto err_inval; |
780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) | 780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
781 | goto err_inval; | 781 | goto err_inval; |
782 | #ifdef CONFIG_NET_CLS_ROUTE | 782 | #ifdef CONFIG_IP_ROUTE_CLASSID |
783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) | 783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
784 | goto err_inval; | 784 | goto err_inval; |
785 | #endif | 785 | #endif |
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
792 | nh->nh_oif = cfg->fc_oif; | 792 | nh->nh_oif = cfg->fc_oif; |
793 | nh->nh_gw = cfg->fc_gw; | 793 | nh->nh_gw = cfg->fc_gw; |
794 | nh->nh_flags = cfg->fc_flags; | 794 | nh->nh_flags = cfg->fc_flags; |
795 | #ifdef CONFIG_NET_CLS_ROUTE | 795 | #ifdef CONFIG_IP_ROUTE_CLASSID |
796 | nh->nh_tclassid = cfg->fc_flow; | 796 | nh->nh_tclassid = cfg->fc_flow; |
797 | #endif | 797 | #endif |
798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1002 | 1002 | ||
1003 | if (fi->fib_nh->nh_oif) | 1003 | if (fi->fib_nh->nh_oif) |
1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
1005 | #ifdef CONFIG_NET_CLS_ROUTE | 1005 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1006 | if (fi->fib_nh[0].nh_tclassid) | 1006 | if (fi->fib_nh[0].nh_tclassid) |
1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
1008 | #endif | 1008 | #endif |
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1027 | 1027 | ||
1028 | if (nh->nh_gw) | 1028 | if (nh->nh_gw) |
1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); |
1030 | #ifdef CONFIG_NET_CLS_ROUTE | 1030 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1031 | if (nh->nh_tclassid) | 1031 | if (nh->nh_tclassid) |
1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
1033 | #endif | 1033 | #endif |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc26cb7..d7b2b0987a3b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
340 | } | 340 | } |
341 | } | 341 | } |
342 | 342 | ||
343 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_IP_ROUTE_CLASSID |
344 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
346 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2bae5f..f926a310075d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT | |||
206 | 206 | ||
207 | config NF_NAT_SNMP_BASIC | 207 | config NF_NAT_SNMP_BASIC |
208 | tristate "Basic SNMP-ALG support" | 208 | tristate "Basic SNMP-ALG support" |
209 | depends on NF_NAT | 209 | depends on NF_CONNTRACK_SNMP && NF_NAT |
210 | depends on NETFILTER_ADVANCED | 210 | depends on NETFILTER_ADVANCED |
211 | default NF_NAT && NF_CONNTRACK_SNMP | ||
211 | ---help--- | 212 | ---help--- |
212 | 213 | ||
213 | This module implements an Application Layer Gateway (ALG) for | 214 | This module implements an Application Layer Gateway (ALG) for |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e855fffaed95..e95054c690c6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
867 | newinfo->initial_entries = 0; | 867 | newinfo->initial_entries = 0; |
868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
869 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
869 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 870 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
870 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 871 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
871 | if (ret != 0) | 872 | if (ret != 0) |
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name, | |||
1333 | duprintf("translate_compat_table: size %u\n", info->size); | 1334 | duprintf("translate_compat_table: size %u\n", info->size); |
1334 | j = 0; | 1335 | j = 0; |
1335 | xt_compat_lock(NFPROTO_ARP); | 1336 | xt_compat_lock(NFPROTO_ARP); |
1337 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1336 | /* Walk through entries, checking offsets. */ | 1338 | /* Walk through entries, checking offsets. */ |
1337 | xt_entry_foreach(iter0, entry0, total_size) { | 1339 | xt_entry_foreach(iter0, entry0, total_size) { |
1338 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1340 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 652efea013dc..ef7d7b9680ea 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1064 | newinfo->initial_entries = 0; | 1064 | newinfo->initial_entries = 0; |
1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1066 | xt_compat_init_offsets(AF_INET, info->number); | ||
1066 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1067 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1067 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1068 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1068 | if (ret != 0) | 1069 | if (ret != 0) |
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net, | |||
1664 | duprintf("translate_compat_table: size %u\n", info->size); | 1665 | duprintf("translate_compat_table: size %u\n", info->size); |
1665 | j = 0; | 1666 | j = 0; |
1666 | xt_compat_lock(AF_INET); | 1667 | xt_compat_lock(AF_INET); |
1668 | xt_compat_init_offsets(AF_INET, number); | ||
1667 | /* Walk through entries, checking offsets. */ | 1669 | /* Walk through entries, checking offsets. */ |
1668 | xt_entry_foreach(iter0, entry0, total_size) { | 1670 | xt_entry_foreach(iter0, entry0, total_size) { |
1669 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1671 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a4897655..403ca57f6011 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
300 | * that the ->target() function isn't called after ->destroy() */ | 300 | * that the ->target() function isn't called after ->destroy() */ |
301 | 301 | ||
302 | ct = nf_ct_get(skb, &ctinfo); | 302 | ct = nf_ct_get(skb, &ctinfo); |
303 | if (ct == NULL) { | 303 | if (ct == NULL) |
304 | pr_info("no conntrack!\n"); | ||
305 | /* FIXME: need to drop invalid ones, since replies | ||
306 | * to outgoing connections of other nodes will be | ||
307 | * marked as INVALID */ | ||
308 | return NF_DROP; | 304 | return NF_DROP; |
309 | } | ||
310 | 305 | ||
311 | /* special case: ICMP error handling. conntrack distinguishes between | 306 | /* special case: ICMP error handling. conntrack distinguishes between |
312 | * error messages (RELATED) and information requests (see below) */ | 307 | * error messages (RELATED) and information requests (see below) */ |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8fda2e9..d76d6c9ed946 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, | |||
442 | } | 442 | } |
443 | #endif | 443 | #endif |
444 | 444 | ||
445 | /* MAC logging for input path only. */ | 445 | if (in != NULL) |
446 | if (in && !out) | ||
447 | dump_mac_header(m, loginfo, skb); | 446 | dump_mac_header(m, loginfo, skb); |
448 | 447 | ||
449 | dump_packet(m, loginfo, skb, 0); | 448 | dump_packet(m, loginfo, skb, 0); |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 294a2a32f293..aef5d1fbe77d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) | |||
60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
61 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
62 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN) { |
64 | iph = ip_hdr(skb); | 64 | iph = ip_hdr(skb); |
65 | 65 | ||
66 | if (iph->saddr != saddr || | 66 | if (iph->saddr != saddr || |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63f60fc5d26a..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
21 | #include <net/netfilter/nf_conntrack_expect.h> | 21 | #include <net/netfilter/nf_conntrack_expect.h> |
22 | #include <net/netfilter/nf_conntrack_acct.h> | 22 | #include <net/netfilter/nf_conntrack_acct.h> |
23 | #include <linux/rculist_nulls.h> | ||
23 | 24 | ||
24 | struct ct_iter_state { | 25 | struct ct_iter_state { |
25 | struct seq_net_private p; | 26 | struct seq_net_private p; |
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
35 | for (st->bucket = 0; | 36 | for (st->bucket = 0; |
36 | st->bucket < net->ct.htable_size; | 37 | st->bucket < net->ct.htable_size; |
37 | st->bucket++) { | 38 | st->bucket++) { |
38 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 39 | n = rcu_dereference( |
40 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
39 | if (!is_a_nulls(n)) | 41 | if (!is_a_nulls(n)) |
40 | return n; | 42 | return n; |
41 | } | 43 | } |
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
48 | struct net *net = seq_file_net(seq); | 50 | struct net *net = seq_file_net(seq); |
49 | struct ct_iter_state *st = seq->private; | 51 | struct ct_iter_state *st = seq->private; |
50 | 52 | ||
51 | head = rcu_dereference(head->next); | 53 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
52 | while (is_a_nulls(head)) { | 54 | while (is_a_nulls(head)) { |
53 | if (likely(get_nulls_value(head) == st->bucket)) { | 55 | if (likely(get_nulls_value(head) == st->bucket)) { |
54 | if (++st->bucket >= net->ct.htable_size) | 56 | if (++st->bucket >= net->ct.htable_size) |
55 | return NULL; | 57 | return NULL; |
56 | } | 58 | } |
57 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 59 | head = rcu_dereference( |
60 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
58 | } | 61 | } |
59 | return head; | 62 | return head; |
60 | } | 63 | } |
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
217 | struct hlist_node *n; | 220 | struct hlist_node *n; |
218 | 221 | ||
219 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 222 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
220 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 223 | n = rcu_dereference( |
224 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
221 | if (n) | 225 | if (n) |
222 | return n; | 226 | return n; |
223 | } | 227 | } |
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
230 | struct net *net = seq_file_net(seq); | 234 | struct net *net = seq_file_net(seq); |
231 | struct ct_expect_iter_state *st = seq->private; | 235 | struct ct_expect_iter_state *st = seq->private; |
232 | 236 | ||
233 | head = rcu_dereference(head->next); | 237 | head = rcu_dereference(hlist_next_rcu(head)); |
234 | while (head == NULL) { | 238 | while (head == NULL) { |
235 | if (++st->bucket >= nf_ct_expect_hsize) | 239 | if (++st->bucket >= nf_ct_expect_hsize) |
236 | return NULL; | 240 | return NULL; |
237 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 241 | head = rcu_dereference( |
242 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
238 | } | 243 | } |
239 | return head; | 244 | return head; |
240 | } | 245 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f06df0..703f366fd235 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, | |||
44 | 44 | ||
45 | /* Try to get same port: if not, try to change it. */ | 45 | /* Try to get same port: if not, try to change it. */ |
46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { |
47 | int ret; | 47 | int res; |
48 | 48 | ||
49 | exp->tuple.dst.u.tcp.port = htons(port); | 49 | exp->tuple.dst.u.tcp.port = htons(port); |
50 | ret = nf_ct_expect_related(exp); | 50 | res = nf_ct_expect_related(exp); |
51 | if (ret == 0) | 51 | if (res == 0) |
52 | break; | 52 | break; |
53 | else if (ret != -EBUSY) { | 53 | else if (res != -EBUSY) { |
54 | port = 0; | 54 | port = 0; |
55 | break; | 55 | break; |
56 | } | 56 | } |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787ce1a71..21bcf471b25a 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
221 | manips not an issue. */ | 221 | manips not an issue. */ |
222 | if (maniptype == IP_NAT_MANIP_SRC && | 222 | if (maniptype == IP_NAT_MANIP_SRC && |
223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
224 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { | 224 | /* try the original tuple first */ |
225 | if (in_range(orig_tuple, range)) { | ||
226 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | ||
227 | *tuple = *orig_tuple; | ||
228 | return; | ||
229 | } | ||
230 | } else if (find_appropriate_src(net, zone, orig_tuple, tuple, | ||
231 | range)) { | ||
225 | pr_debug("get_unique_tuple: Found current src map\n"); | 232 | pr_debug("get_unique_tuple: Found current src map\n"); |
226 | if (!nf_nat_used_tuple(tuple, ct)) | 233 | if (!nf_nat_used_tuple(tuple, ct)) |
227 | return; | 234 | return; |
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
266 | struct net *net = nf_ct_net(ct); | 273 | struct net *net = nf_ct_net(ct); |
267 | struct nf_conntrack_tuple curr_tuple, new_tuple; | 274 | struct nf_conntrack_tuple curr_tuple, new_tuple; |
268 | struct nf_conn_nat *nat; | 275 | struct nf_conn_nat *nat; |
269 | int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); | ||
270 | 276 | ||
271 | /* nat helper or nfctnetlink also setup binding */ | 277 | /* nat helper or nfctnetlink also setup binding */ |
272 | nat = nfct_nat(ct); | 278 | nat = nfct_nat(ct); |
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
306 | ct->status |= IPS_DST_NAT; | 312 | ct->status |= IPS_DST_NAT; |
307 | } | 313 | } |
308 | 314 | ||
309 | /* Place in source hash if this is the first time. */ | 315 | if (maniptype == IP_NAT_MANIP_SRC) { |
310 | if (have_to_hash) { | ||
311 | unsigned int srchash; | 316 | unsigned int srchash; |
312 | 317 | ||
313 | srchash = hash_by_src(net, nf_ct_zone(ct), | 318 | srchash = hash_by_src(net, nf_ct_zone(ct), |
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
323 | 328 | ||
324 | /* It's done. */ | 329 | /* It's done. */ |
325 | if (maniptype == IP_NAT_MANIP_DST) | 330 | if (maniptype == IP_NAT_MANIP_DST) |
326 | set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 331 | ct->status |= IPS_DST_NAT_DONE; |
327 | else | 332 | else |
328 | set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 333 | ct->status |= IPS_SRC_NAT_DONE; |
329 | 334 | ||
330 | return NF_ACCEPT; | 335 | return NF_ACCEPT; |
331 | } | 336 | } |
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
502 | int ret = 0; | 507 | int ret = 0; |
503 | 508 | ||
504 | spin_lock_bh(&nf_nat_lock); | 509 | spin_lock_bh(&nf_nat_lock); |
505 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 510 | if (rcu_dereference_protected( |
511 | nf_nat_protos[proto->protonum], | ||
512 | lockdep_is_held(&nf_nat_lock) | ||
513 | ) != &nf_nat_unknown_protocol) { | ||
506 | ret = -EBUSY; | 514 | ret = -EBUSY; |
507 | goto out; | 515 | goto out; |
508 | } | 516 | } |
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | |||
532 | if (nat == NULL || nat->ct == NULL) | 540 | if (nat == NULL || nat->ct == NULL) |
533 | return; | 541 | return; |
534 | 542 | ||
535 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); | 543 | NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); |
536 | 544 | ||
537 | spin_lock_bh(&nf_nat_lock); | 545 | spin_lock_bh(&nf_nat_lock); |
538 | hlist_del_rcu(&nat->bysource); | 546 | hlist_del_rcu(&nat->bysource); |
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old) | |||
545 | struct nf_conn_nat *old_nat = old; | 553 | struct nf_conn_nat *old_nat = old; |
546 | struct nf_conn *ct = old_nat->ct; | 554 | struct nf_conn *ct = old_nat->ct; |
547 | 555 | ||
548 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) | 556 | if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) |
549 | return; | 557 | return; |
550 | 558 | ||
551 | spin_lock_bh(&nf_nat_lock); | 559 | spin_lock_bh(&nf_nat_lock); |
552 | new_nat->ct = ct; | ||
553 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | 560 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); |
554 | spin_unlock_bh(&nf_nat_lock); | 561 | spin_unlock_bh(&nf_nat_lock); |
555 | } | 562 | } |
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net) | |||
679 | { | 686 | { |
680 | /* Leave them the same for the moment. */ | 687 | /* Leave them the same for the moment. */ |
681 | net->ipv4.nat_htable_size = net->ct.htable_size; | 688 | net->ipv4.nat_htable_size = net->ct.htable_size; |
682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | 689 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); |
683 | &net->ipv4.nat_vmalloced, 0); | ||
684 | if (!net->ipv4.nat_bysource) | 690 | if (!net->ipv4.nat_bysource) |
685 | return -ENOMEM; | 691 | return -ENOMEM; |
686 | return 0; | 692 | return 0; |
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
702 | { | 708 | { |
703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 709 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
704 | synchronize_rcu(); | 710 | synchronize_rcu(); |
705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 711 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); |
706 | net->ipv4.nat_htable_size); | ||
707 | } | 712 | } |
708 | 713 | ||
709 | static struct pernet_operations nf_nat_net_ops = { | 714 | static struct pernet_operations nf_nat_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419d0a56..8812a02078ab 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <net/netfilter/nf_conntrack_expect.h> | 54 | #include <net/netfilter/nf_conntrack_expect.h> |
55 | #include <net/netfilter/nf_conntrack_helper.h> | 55 | #include <net/netfilter/nf_conntrack_helper.h> |
56 | #include <net/netfilter/nf_nat_helper.h> | 56 | #include <net/netfilter/nf_nat_helper.h> |
57 | #include <linux/netfilter/nf_conntrack_snmp.h> | ||
57 | 58 | ||
58 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
59 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 60 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1310 | { | 1311 | { |
1311 | int ret = 0; | 1312 | int ret = 0; |
1312 | 1313 | ||
1313 | ret = nf_conntrack_helper_register(&snmp_helper); | 1314 | BUG_ON(nf_nat_snmp_hook != NULL); |
1314 | if (ret < 0) | 1315 | rcu_assign_pointer(nf_nat_snmp_hook, help); |
1315 | return ret; | 1316 | |
1316 | ret = nf_conntrack_helper_register(&snmp_trap_helper); | 1317 | ret = nf_conntrack_helper_register(&snmp_trap_helper); |
1317 | if (ret < 0) { | 1318 | if (ret < 0) { |
1318 | nf_conntrack_helper_unregister(&snmp_helper); | 1319 | nf_conntrack_helper_unregister(&snmp_helper); |
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1323 | 1324 | ||
1324 | static void __exit nf_nat_snmp_basic_fini(void) | 1325 | static void __exit nf_nat_snmp_basic_fini(void) |
1325 | { | 1326 | { |
1326 | nf_conntrack_helper_unregister(&snmp_helper); | 1327 | rcu_assign_pointer(nf_nat_snmp_hook, NULL); |
1327 | nf_conntrack_helper_unregister(&snmp_trap_helper); | 1328 | nf_conntrack_helper_unregister(&snmp_trap_helper); |
1328 | } | 1329 | } |
1329 | 1330 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 351dc4e85242..3e5b7cc2db4f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
514 | .release = seq_release, | 514 | .release = seq_release, |
515 | }; | 515 | }; |
516 | 516 | ||
517 | #ifdef CONFIG_NET_CLS_ROUTE | 517 | #ifdef CONFIG_IP_ROUTE_CLASSID |
518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 518 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
519 | { | 519 | { |
520 | struct ip_rt_acct *dst, *src; | 520 | struct ip_rt_acct *dst, *src; |
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | if (!pde) | 567 | if (!pde) |
568 | goto err2; | 568 | goto err2; |
569 | 569 | ||
570 | #ifdef CONFIG_NET_CLS_ROUTE | 570 | #ifdef CONFIG_IP_ROUTE_CLASSID |
571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
572 | if (!pde) | 572 | if (!pde) |
573 | goto err3; | 573 | goto err3; |
574 | #endif | 574 | #endif |
575 | return 0; | 575 | return 0; |
576 | 576 | ||
577 | #ifdef CONFIG_NET_CLS_ROUTE | 577 | #ifdef CONFIG_IP_ROUTE_CLASSID |
578 | err3: | 578 | err3: |
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 579 | remove_proc_entry("rt_cache", net->proc_net_stat); |
580 | #endif | 580 | #endif |
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 588 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 589 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 590 | remove_proc_entry("rt_cache", net->proc_net); |
591 | #ifdef CONFIG_NET_CLS_ROUTE | 591 | #ifdef CONFIG_IP_ROUTE_CLASSID |
592 | remove_proc_entry("rt_acct", net->proc_net); | 592 | remove_proc_entry("rt_acct", net->proc_net); |
593 | #endif | 593 | #endif |
594 | } | 594 | } |
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1775 | memcpy(addr, &src, 4); | 1775 | memcpy(addr, &src, 4); |
1776 | } | 1776 | } |
1777 | 1777 | ||
1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1778 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1779 | static void set_class_tag(struct rtable *rt, u32 tag) |
1780 | { | 1780 | { |
1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1781 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1826 | rt->rt_gateway = FIB_RES_GW(*res); |
1827 | dst_import_metrics(dst, fi->fib_metrics); | 1827 | dst_import_metrics(dst, fi->fib_metrics); |
1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1828 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1830 | #endif | 1830 | #endif |
1831 | } | 1831 | } |
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1837 | 1837 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1838 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1840 | set_class_tag(rt, fib_rules_tclass(res)); |
1841 | #endif | 1841 | #endif |
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1891 | rth->fl.mark = skb->mark; | 1891 | rth->fl.mark = skb->mark; |
1892 | rth->fl.fl4_src = saddr; | 1892 | rth->fl.fl4_src = saddr; |
1893 | rth->rt_src = saddr; | 1893 | rth->rt_src = saddr; |
1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1894 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1895 | rth->dst.tclassid = itag; | 1895 | rth->dst.tclassid = itag; |
1896 | #endif | 1896 | #endif |
1897 | rth->rt_iif = | 1897 | rth->rt_iif = |
@@ -2208,7 +2208,7 @@ local_input: | |||
2208 | rth->fl.mark = skb->mark; | 2208 | rth->fl.mark = skb->mark; |
2209 | rth->fl.fl4_src = saddr; | 2209 | rth->fl.fl4_src = saddr; |
2210 | rth->rt_src = saddr; | 2210 | rth->rt_src = saddr; |
2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2211 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2212 | rth->dst.tclassid = itag; | 2212 | rth->dst.tclassid = itag; |
2213 | #endif | 2213 | #endif |
2214 | rth->rt_iif = | 2214 | rth->rt_iif = |
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net, | |||
2828 | } | 2828 | } |
2829 | if (rt->dst.dev) | 2829 | if (rt->dst.dev) |
2830 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2830 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2831 | #ifdef CONFIG_NET_CLS_ROUTE | 2831 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2832 | if (rt->dst.tclassid) | 2832 | if (rt->dst.tclassid) |
2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2834 | #endif | 2834 | #endif |
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3249 | }; | 3249 | }; |
3250 | 3250 | ||
3251 | 3251 | ||
3252 | #ifdef CONFIG_NET_CLS_ROUTE | 3252 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3253 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3253 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3254 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3254 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3255 | 3255 | ||
3256 | static __initdata unsigned long rhash_entries; | 3256 | static __initdata unsigned long rhash_entries; |
3257 | static int __init set_rhash_entries(char *str) | 3257 | static int __init set_rhash_entries(char *str) |
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void) | |||
3267 | { | 3267 | { |
3268 | int rc = 0; | 3268 | int rc = 0; |
3269 | 3269 | ||
3270 | #ifdef CONFIG_NET_CLS_ROUTE | 3270 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3271 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3271 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3272 | if (!ip_rt_acct) | 3272 | if (!ip_rt_acct) |
3273 | panic("IP: failed to allocate ip_rt_acct\n"); | 3273 | panic("IP: failed to allocate ip_rt_acct\n"); |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7d227c644f72..47b7b8df7fac 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1076 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1076 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1077 | newinfo->initial_entries = 0; | 1077 | newinfo->initial_entries = 0; |
1078 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1078 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1079 | xt_compat_init_offsets(AF_INET6, info->number); | ||
1079 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1080 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1080 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1081 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1081 | if (ret != 0) | 1082 | if (ret != 0) |
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net, | |||
1679 | duprintf("translate_compat_table: size %u\n", info->size); | 1680 | duprintf("translate_compat_table: size %u\n", info->size); |
1680 | j = 0; | 1681 | j = 0; |
1681 | xt_compat_lock(AF_INET6); | 1682 | xt_compat_lock(AF_INET6); |
1683 | xt_compat_init_offsets(AF_INET6, number); | ||
1682 | /* Walk through entries, checking offsets. */ | 1684 | /* Walk through entries, checking offsets. */ |
1683 | xt_entry_foreach(iter0, entry0, total_size) { | 1685 | xt_entry_foreach(iter0, entry0, total_size) { |
1684 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1686 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 09c88891a753..05027b753721 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c | |||
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf, | |||
452 | in ? in->name : "", | 452 | in ? in->name : "", |
453 | out ? out->name : ""); | 453 | out ? out->name : ""); |
454 | 454 | ||
455 | /* MAC logging for input path only. */ | 455 | if (in != NULL) |
456 | if (in && !out) | ||
457 | dump_mac_header(m, loginfo, skb); | 456 | dump_mac_header(m, loginfo, skb); |
458 | 457 | ||
459 | dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); | 458 | dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 79d43aa8fa8d..085727263812 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/netfilter_ipv6.h> | 45 | #include <linux/netfilter_ipv6.h> |
46 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
47 | #include <linux/module.h> | 47 | #include <linux/module.h> |
48 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | ||
48 | 49 | ||
49 | 50 | ||
50 | struct nf_ct_frag6_skb_cb | 51 | struct nf_ct_frag6_skb_cb |
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags; | |||
73 | static struct netns_frags nf_init_frags; | 74 | static struct netns_frags nf_init_frags; |
74 | 75 | ||
75 | #ifdef CONFIG_SYSCTL | 76 | #ifdef CONFIG_SYSCTL |
76 | struct ctl_table nf_ct_frag6_sysctl_table[] = { | 77 | static struct ctl_table nf_ct_frag6_sysctl_table[] = { |
77 | { | 78 | { |
78 | .procname = "nf_conntrack_frag6_timeout", | 79 | .procname = "nf_conntrack_frag6_timeout", |
79 | .data = &nf_init_frags.timeout, | 80 | .data = &nf_init_frags.timeout, |
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 86c39526ba5e..2bc6cd7bb8ec 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -123,18 +123,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) | |||
123 | } | 123 | } |
124 | 124 | ||
125 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 125 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
126 | static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); | 126 | typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); |
127 | 127 | ||
128 | int rawv6_mh_filter_register(int (*filter)(struct sock *sock, | 128 | static mh_filter_t __rcu *mh_filter __read_mostly; |
129 | struct sk_buff *skb)) | 129 | |
130 | int rawv6_mh_filter_register(mh_filter_t filter) | ||
130 | { | 131 | { |
131 | rcu_assign_pointer(mh_filter, filter); | 132 | rcu_assign_pointer(mh_filter, filter); |
132 | return 0; | 133 | return 0; |
133 | } | 134 | } |
134 | EXPORT_SYMBOL(rawv6_mh_filter_register); | 135 | EXPORT_SYMBOL(rawv6_mh_filter_register); |
135 | 136 | ||
136 | int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, | 137 | int rawv6_mh_filter_unregister(mh_filter_t filter) |
137 | struct sk_buff *skb)) | ||
138 | { | 138 | { |
139 | rcu_assign_pointer(mh_filter, NULL); | 139 | rcu_assign_pointer(mh_filter, NULL); |
140 | synchronize_rcu(); | 140 | synchronize_rcu(); |
@@ -192,10 +192,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
192 | * policy is placed in rawv6_rcv() because it is | 192 | * policy is placed in rawv6_rcv() because it is |
193 | * required for each socket. | 193 | * required for each socket. |
194 | */ | 194 | */ |
195 | int (*filter)(struct sock *sock, struct sk_buff *skb); | 195 | mh_filter_t *filter; |
196 | 196 | ||
197 | filter = rcu_dereference(mh_filter); | 197 | filter = rcu_dereference(mh_filter); |
198 | filtered = filter ? filter(sk, skb) : 0; | 198 | filtered = filter ? (*filter)(sk, skb) : 0; |
199 | break; | 199 | break; |
200 | } | 200 | } |
201 | #endif | 201 | #endif |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8ce38f10a547..b1599a345c10 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head) | |||
412 | 412 | ||
413 | p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); | 413 | p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); |
414 | do { | 414 | do { |
415 | n = p->next; | 415 | n = rcu_dereference_protected(p->next, 1); |
416 | kfree(p); | 416 | kfree(p); |
417 | p = n; | 417 | p = n; |
418 | } while (p); | 418 | } while (p); |
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head) | |||
421 | static int | 421 | static int |
422 | ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) | 422 | ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) |
423 | { | 423 | { |
424 | struct ip_tunnel_prl_entry *x, **p; | 424 | struct ip_tunnel_prl_entry *x; |
425 | struct ip_tunnel_prl_entry __rcu **p; | ||
425 | int err = 0; | 426 | int err = 0; |
426 | 427 | ||
427 | ASSERT_RTNL(); | 428 | ASSERT_RTNL(); |
428 | 429 | ||
429 | if (a && a->addr != htonl(INADDR_ANY)) { | 430 | if (a && a->addr != htonl(INADDR_ANY)) { |
430 | for (p = &t->prl; *p; p = &(*p)->next) { | 431 | for (p = &t->prl; |
431 | if ((*p)->addr == a->addr) { | 432 | (x = rtnl_dereference(*p)) != NULL; |
432 | x = *p; | 433 | p = &x->next) { |
434 | if (x->addr == a->addr) { | ||
433 | *p = x->next; | 435 | *p = x->next; |
434 | call_rcu(&x->rcu_head, prl_entry_destroy_rcu); | 436 | call_rcu(&x->rcu_head, prl_entry_destroy_rcu); |
435 | t->prl_count--; | 437 | t->prl_count--; |
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) | |||
438 | } | 440 | } |
439 | err = -ENXIO; | 441 | err = -ENXIO; |
440 | } else { | 442 | } else { |
441 | if (t->prl) { | 443 | x = rtnl_dereference(t->prl); |
444 | if (x) { | ||
442 | t->prl_count = 0; | 445 | t->prl_count = 0; |
443 | x = t->prl; | ||
444 | call_rcu(&x->rcu_head, prl_list_destroy_rcu); | 446 | call_rcu(&x->rcu_head, prl_list_destroy_rcu); |
445 | t->prl = NULL; | 447 | t->prl = NULL; |
446 | } | 448 | } |
@@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) | |||
1179 | if (!dev->tstats) | 1181 | if (!dev->tstats) |
1180 | return -ENOMEM; | 1182 | return -ENOMEM; |
1181 | dev_hold(dev); | 1183 | dev_hold(dev); |
1182 | sitn->tunnels_wc[0] = tunnel; | 1184 | rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); |
1183 | return 0; | 1185 | return 0; |
1184 | } | 1186 | } |
1185 | 1187 | ||
@@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea | |||
1196 | for (prio = 1; prio < 4; prio++) { | 1198 | for (prio = 1; prio < 4; prio++) { |
1197 | int h; | 1199 | int h; |
1198 | for (h = 0; h < HASH_SIZE; h++) { | 1200 | for (h = 0; h < HASH_SIZE; h++) { |
1199 | struct ip_tunnel *t = sitn->tunnels[prio][h]; | 1201 | struct ip_tunnel *t; |
1200 | 1202 | ||
1203 | t = rtnl_dereference(sitn->tunnels[prio][h]); | ||
1201 | while (t != NULL) { | 1204 | while (t != NULL) { |
1202 | unregister_netdevice_queue(t->dev, head); | 1205 | unregister_netdevice_queue(t->dev, head); |
1203 | t = t->next; | 1206 | t = rtnl_dereference(t->next); |
1204 | } | 1207 | } |
1205 | } | 1208 | } |
1206 | } | 1209 | } |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1534f2b44caf..faf7412ea453 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS | |||
85 | 85 | ||
86 | If unsure, say `N'. | 86 | If unsure, say `N'. |
87 | 87 | ||
88 | config NF_CONNTRACK_TIMESTAMP | ||
89 | bool 'Connection tracking timestamping' | ||
90 | depends on NETFILTER_ADVANCED | ||
91 | help | ||
92 | This option enables support for connection tracking timestamping. | ||
93 | This allows you to store the flow start-time and to obtain | ||
94 | the flow-stop time (once it has been destroyed) via Connection | ||
95 | tracking events. | ||
96 | |||
97 | If unsure, say `N'. | ||
98 | |||
88 | config NF_CT_PROTO_DCCP | 99 | config NF_CT_PROTO_DCCP |
89 | tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' | 100 | tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' |
90 | depends on EXPERIMENTAL | 101 | depends on EXPERIMENTAL |
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC | |||
185 | 196 | ||
186 | To compile it as a module, choose M here. If unsure, say N. | 197 | To compile it as a module, choose M here. If unsure, say N. |
187 | 198 | ||
199 | config NF_CONNTRACK_BROADCAST | ||
200 | tristate | ||
201 | |||
188 | config NF_CONNTRACK_NETBIOS_NS | 202 | config NF_CONNTRACK_NETBIOS_NS |
189 | tristate "NetBIOS name service protocol support" | 203 | tristate "NetBIOS name service protocol support" |
190 | depends on NETFILTER_ADVANCED | 204 | depends on NETFILTER_ADVANCED |
205 | select NF_CONNTRACK_BROADCAST | ||
191 | help | 206 | help |
192 | NetBIOS name service requests are sent as broadcast messages from an | 207 | NetBIOS name service requests are sent as broadcast messages from an |
193 | unprivileged port and responded to with unicast messages to the | 208 | unprivileged port and responded to with unicast messages to the |
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS | |||
204 | 219 | ||
205 | To compile it as a module, choose M here. If unsure, say N. | 220 | To compile it as a module, choose M here. If unsure, say N. |
206 | 221 | ||
222 | config NF_CONNTRACK_SNMP | ||
223 | tristate "SNMP service protocol support" | ||
224 | depends on NETFILTER_ADVANCED | ||
225 | select NF_CONNTRACK_BROADCAST | ||
226 | help | ||
227 | SNMP service requests are sent as broadcast messages from an | ||
228 | unprivileged port and responded to with unicast messages to the | ||
229 | same port. This make them hard to firewall properly because connection | ||
230 | tracking doesn't deal with broadcasts. This helper tracks locally | ||
231 | originating SNMP service requests and the corresponding | ||
232 | responses. It relies on correct IP address configuration, specifically | ||
233 | netmask and broadcast address. | ||
234 | |||
235 | To compile it as a module, choose M here. If unsure, say N. | ||
236 | |||
207 | config NF_CONNTRACK_PPTP | 237 | config NF_CONNTRACK_PPTP |
208 | tristate "PPtP protocol support" | 238 | tristate "PPtP protocol support" |
209 | depends on NETFILTER_ADVANCED | 239 | depends on NETFILTER_ADVANCED |
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK | |||
326 | 356 | ||
327 | comment "Xtables targets" | 357 | comment "Xtables targets" |
328 | 358 | ||
359 | config NETFILTER_XT_TARGET_AUDIT | ||
360 | tristate "AUDIT target support" | ||
361 | depends on AUDIT | ||
362 | depends on NETFILTER_ADVANCED | ||
363 | ---help--- | ||
364 | This option adds a 'AUDIT' target, which can be used to create | ||
365 | audit records for packets dropped/accepted. | ||
366 | |||
367 | To compileit as a module, choose M here. If unsure, say N. | ||
368 | |||
329 | config NETFILTER_XT_TARGET_CHECKSUM | 369 | config NETFILTER_XT_TARGET_CHECKSUM |
330 | tristate "CHECKSUM target support" | 370 | tristate "CHECKSUM target support" |
331 | depends on IP_NF_MANGLE || IP6_NF_MANGLE | 371 | depends on IP_NF_MANGLE || IP6_NF_MANGLE |
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG | |||
477 | config NETFILTER_XT_TARGET_NFQUEUE | 517 | config NETFILTER_XT_TARGET_NFQUEUE |
478 | tristate '"NFQUEUE" target Support' | 518 | tristate '"NFQUEUE" target Support' |
479 | depends on NETFILTER_ADVANCED | 519 | depends on NETFILTER_ADVANCED |
520 | select NETFILTER_NETLINK_QUEUE | ||
480 | help | 521 | help |
481 | This target replaced the old obsolete QUEUE target. | 522 | This target replaced the old obsolete QUEUE target. |
482 | 523 | ||
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST | |||
886 | config NETFILTER_XT_MATCH_REALM | 927 | config NETFILTER_XT_MATCH_REALM |
887 | tristate '"realm" match support' | 928 | tristate '"realm" match support' |
888 | depends on NETFILTER_ADVANCED | 929 | depends on NETFILTER_ADVANCED |
889 | select NET_CLS_ROUTE | 930 | select IP_ROUTE_CLASSID |
890 | help | 931 | help |
891 | This option adds a `realm' match, which allows you to use the realm | 932 | This option adds a `realm' match, which allows you to use the realm |
892 | key from the routing subsystem inside iptables. | 933 | key from the routing subsystem inside iptables. |
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 441050f31111..9ae6878a85b1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o | 1 | netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o |
2 | 2 | ||
3 | nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o | 3 | nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o |
4 | nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o | ||
4 | nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o | 5 | nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o |
5 | 6 | ||
6 | obj-$(CONFIG_NETFILTER) = netfilter.o | 7 | obj-$(CONFIG_NETFILTER) = netfilter.o |
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o | |||
28 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o | 29 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o |
29 | obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o | 30 | obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o |
30 | obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o | 31 | obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o |
32 | obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o | ||
31 | obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o | 33 | obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o |
34 | obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o | ||
32 | obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o | 35 | obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o |
33 | obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o | 36 | obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o |
34 | obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o | 37 | obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o |
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o | |||
45 | obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o | 48 | obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o |
46 | 49 | ||
47 | # targets | 50 | # targets |
51 | obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o | ||
48 | obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o | 52 | obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o |
49 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o | 53 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o |
50 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o | 54 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 32fcbe290c04..1e00bf7d27c5 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
@@ -175,13 +175,21 @@ next_hook: | |||
175 | ret = 1; | 175 | ret = 1; |
176 | } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { | 176 | } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { |
177 | kfree_skb(skb); | 177 | kfree_skb(skb); |
178 | ret = -(verdict >> NF_VERDICT_BITS); | 178 | ret = NF_DROP_GETERR(verdict); |
179 | if (ret == 0) | 179 | if (ret == 0) |
180 | ret = -EPERM; | 180 | ret = -EPERM; |
181 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { | 181 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { |
182 | if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | 182 | ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, |
183 | verdict >> NF_VERDICT_BITS)) | 183 | verdict >> NF_VERDICT_QBITS); |
184 | goto next_hook; | 184 | if (ret < 0) { |
185 | if (ret == -ECANCELED) | ||
186 | goto next_hook; | ||
187 | if (ret == -ESRCH && | ||
188 | (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) | ||
189 | goto next_hook; | ||
190 | kfree_skb(skb); | ||
191 | } | ||
192 | ret = 0; | ||
185 | } | 193 | } |
186 | rcu_read_unlock(); | 194 | rcu_read_unlock(); |
187 | return ret; | 195 | return ret; |
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable); | |||
214 | /* This does not belong here, but locally generated errors need it if connection | 222 | /* This does not belong here, but locally generated errors need it if connection |
215 | tracking in use: without this, connection may not be in hash table, and hence | 223 | tracking in use: without this, connection may not be in hash table, and hence |
216 | manufactured ICMP or RST packets will not be associated with it. */ | 224 | manufactured ICMP or RST packets will not be associated with it. */ |
217 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | 225 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; |
218 | EXPORT_SYMBOL(ip_ct_attach); | 226 | EXPORT_SYMBOL(ip_ct_attach); |
219 | 227 | ||
220 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | 228 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) |
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | |||
231 | } | 239 | } |
232 | EXPORT_SYMBOL(nf_ct_attach); | 240 | EXPORT_SYMBOL(nf_ct_attach); |
233 | 241 | ||
234 | void (*nf_ct_destroy)(struct nf_conntrack *); | 242 | void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; |
235 | EXPORT_SYMBOL(nf_ct_destroy); | 243 | EXPORT_SYMBOL(nf_ct_destroy); |
236 | 244 | ||
237 | void nf_conntrack_destroy(struct nf_conntrack *nfct) | 245 | void nf_conntrack_destroy(struct nf_conntrack *nfct) |
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index a475edee0912..5c48ffb60c28 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c | |||
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); | |||
43 | EXPORT_SYMBOL(unregister_ip_vs_app); | 43 | EXPORT_SYMBOL(unregister_ip_vs_app); |
44 | EXPORT_SYMBOL(register_ip_vs_app_inc); | 44 | EXPORT_SYMBOL(register_ip_vs_app_inc); |
45 | 45 | ||
46 | /* ipvs application list head */ | ||
47 | static LIST_HEAD(ip_vs_app_list); | ||
48 | static DEFINE_MUTEX(__ip_vs_app_mutex); | ||
49 | |||
50 | |||
51 | /* | 46 | /* |
52 | * Get an ip_vs_app object | 47 | * Get an ip_vs_app object |
53 | */ | 48 | */ |
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) | |||
67 | * Allocate/initialize app incarnation and register it in proto apps. | 62 | * Allocate/initialize app incarnation and register it in proto apps. |
68 | */ | 63 | */ |
69 | static int | 64 | static int |
70 | ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | 65 | ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, |
66 | __u16 port) | ||
71 | { | 67 | { |
72 | struct ip_vs_protocol *pp; | 68 | struct ip_vs_protocol *pp; |
73 | struct ip_vs_app *inc; | 69 | struct ip_vs_app *inc; |
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
98 | } | 94 | } |
99 | } | 95 | } |
100 | 96 | ||
101 | ret = pp->register_app(inc); | 97 | ret = pp->register_app(net, inc); |
102 | if (ret) | 98 | if (ret) |
103 | goto out; | 99 | goto out; |
104 | 100 | ||
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
119 | * Release app incarnation | 115 | * Release app incarnation |
120 | */ | 116 | */ |
121 | static void | 117 | static void |
122 | ip_vs_app_inc_release(struct ip_vs_app *inc) | 118 | ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) |
123 | { | 119 | { |
124 | struct ip_vs_protocol *pp; | 120 | struct ip_vs_protocol *pp; |
125 | 121 | ||
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) | |||
127 | return; | 123 | return; |
128 | 124 | ||
129 | if (pp->unregister_app) | 125 | if (pp->unregister_app) |
130 | pp->unregister_app(inc); | 126 | pp->unregister_app(net, inc); |
131 | 127 | ||
132 | IP_VS_DBG(9, "%s App %s:%u unregistered\n", | 128 | IP_VS_DBG(9, "%s App %s:%u unregistered\n", |
133 | pp->name, inc->name, ntohs(inc->port)); | 129 | pp->name, inc->name, ntohs(inc->port)); |
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) | |||
168 | * Register an application incarnation in protocol applications | 164 | * Register an application incarnation in protocol applications |
169 | */ | 165 | */ |
170 | int | 166 | int |
171 | register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) | 167 | register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, |
168 | __u16 port) | ||
172 | { | 169 | { |
170 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
173 | int result; | 171 | int result; |
174 | 172 | ||
175 | mutex_lock(&__ip_vs_app_mutex); | 173 | mutex_lock(&ipvs->app_mutex); |
176 | 174 | ||
177 | result = ip_vs_app_inc_new(app, proto, port); | 175 | result = ip_vs_app_inc_new(net, app, proto, port); |
178 | 176 | ||
179 | mutex_unlock(&__ip_vs_app_mutex); | 177 | mutex_unlock(&ipvs->app_mutex); |
180 | 178 | ||
181 | return result; | 179 | return result; |
182 | } | 180 | } |
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
185 | /* | 183 | /* |
186 | * ip_vs_app registration routine | 184 | * ip_vs_app registration routine |
187 | */ | 185 | */ |
188 | int register_ip_vs_app(struct ip_vs_app *app) | 186 | int register_ip_vs_app(struct net *net, struct ip_vs_app *app) |
189 | { | 187 | { |
188 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
190 | /* increase the module use count */ | 189 | /* increase the module use count */ |
191 | ip_vs_use_count_inc(); | 190 | ip_vs_use_count_inc(); |
192 | 191 | ||
193 | mutex_lock(&__ip_vs_app_mutex); | 192 | mutex_lock(&ipvs->app_mutex); |
194 | 193 | ||
195 | list_add(&app->a_list, &ip_vs_app_list); | 194 | list_add(&app->a_list, &ipvs->app_list); |
196 | 195 | ||
197 | mutex_unlock(&__ip_vs_app_mutex); | 196 | mutex_unlock(&ipvs->app_mutex); |
198 | 197 | ||
199 | return 0; | 198 | return 0; |
200 | } | 199 | } |
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) | |||
204 | * ip_vs_app unregistration routine | 203 | * ip_vs_app unregistration routine |
205 | * We are sure there are no app incarnations attached to services | 204 | * We are sure there are no app incarnations attached to services |
206 | */ | 205 | */ |
207 | void unregister_ip_vs_app(struct ip_vs_app *app) | 206 | void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) |
208 | { | 207 | { |
208 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
209 | struct ip_vs_app *inc, *nxt; | 209 | struct ip_vs_app *inc, *nxt; |
210 | 210 | ||
211 | mutex_lock(&__ip_vs_app_mutex); | 211 | mutex_lock(&ipvs->app_mutex); |
212 | 212 | ||
213 | list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { | 213 | list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { |
214 | ip_vs_app_inc_release(inc); | 214 | ip_vs_app_inc_release(net, inc); |
215 | } | 215 | } |
216 | 216 | ||
217 | list_del(&app->a_list); | 217 | list_del(&app->a_list); |
218 | 218 | ||
219 | mutex_unlock(&__ip_vs_app_mutex); | 219 | mutex_unlock(&ipvs->app_mutex); |
220 | 220 | ||
221 | /* decrease the module use count */ | 221 | /* decrease the module use count */ |
222 | ip_vs_use_count_dec(); | 222 | ip_vs_use_count_dec(); |
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) | |||
226 | /* | 226 | /* |
227 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) | 227 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) |
228 | */ | 228 | */ |
229 | int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) | 229 | int ip_vs_bind_app(struct ip_vs_conn *cp, |
230 | struct ip_vs_protocol *pp) | ||
230 | { | 231 | { |
231 | return pp->app_conn_bind(cp); | 232 | return pp->app_conn_bind(cp); |
232 | } | 233 | } |
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
481 | * /proc/net/ip_vs_app entry function | 482 | * /proc/net/ip_vs_app entry function |
482 | */ | 483 | */ |
483 | 484 | ||
484 | static struct ip_vs_app *ip_vs_app_idx(loff_t pos) | 485 | static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) |
485 | { | 486 | { |
486 | struct ip_vs_app *app, *inc; | 487 | struct ip_vs_app *app, *inc; |
487 | 488 | ||
488 | list_for_each_entry(app, &ip_vs_app_list, a_list) { | 489 | list_for_each_entry(app, &ipvs->app_list, a_list) { |
489 | list_for_each_entry(inc, &app->incs_list, a_list) { | 490 | list_for_each_entry(inc, &app->incs_list, a_list) { |
490 | if (pos-- == 0) | 491 | if (pos-- == 0) |
491 | return inc; | 492 | return inc; |
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) | |||
497 | 498 | ||
498 | static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) | 499 | static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) |
499 | { | 500 | { |
500 | mutex_lock(&__ip_vs_app_mutex); | 501 | struct net *net = seq_file_net(seq); |
502 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
501 | 503 | ||
502 | return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; | 504 | mutex_lock(&ipvs->app_mutex); |
505 | |||
506 | return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; | ||
503 | } | 507 | } |
504 | 508 | ||
505 | static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 509 | static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
506 | { | 510 | { |
507 | struct ip_vs_app *inc, *app; | 511 | struct ip_vs_app *inc, *app; |
508 | struct list_head *e; | 512 | struct list_head *e; |
513 | struct net *net = seq_file_net(seq); | ||
514 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
509 | 515 | ||
510 | ++*pos; | 516 | ++*pos; |
511 | if (v == SEQ_START_TOKEN) | 517 | if (v == SEQ_START_TOKEN) |
512 | return ip_vs_app_idx(0); | 518 | return ip_vs_app_idx(ipvs, 0); |
513 | 519 | ||
514 | inc = v; | 520 | inc = v; |
515 | app = inc->app; | 521 | app = inc->app; |
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
518 | return list_entry(e, struct ip_vs_app, a_list); | 524 | return list_entry(e, struct ip_vs_app, a_list); |
519 | 525 | ||
520 | /* go on to next application */ | 526 | /* go on to next application */ |
521 | for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { | 527 | for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { |
522 | app = list_entry(e, struct ip_vs_app, a_list); | 528 | app = list_entry(e, struct ip_vs_app, a_list); |
523 | list_for_each_entry(inc, &app->incs_list, a_list) { | 529 | list_for_each_entry(inc, &app->incs_list, a_list) { |
524 | return inc; | 530 | return inc; |
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
529 | 535 | ||
530 | static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) | 536 | static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) |
531 | { | 537 | { |
532 | mutex_unlock(&__ip_vs_app_mutex); | 538 | struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); |
539 | |||
540 | mutex_unlock(&ipvs->app_mutex); | ||
533 | } | 541 | } |
534 | 542 | ||
535 | static int ip_vs_app_seq_show(struct seq_file *seq, void *v) | 543 | static int ip_vs_app_seq_show(struct seq_file *seq, void *v) |
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { | |||
557 | 565 | ||
558 | static int ip_vs_app_open(struct inode *inode, struct file *file) | 566 | static int ip_vs_app_open(struct inode *inode, struct file *file) |
559 | { | 567 | { |
560 | return seq_open(file, &ip_vs_app_seq_ops); | 568 | return seq_open_net(inode, file, &ip_vs_app_seq_ops, |
569 | sizeof(struct seq_net_private)); | ||
561 | } | 570 | } |
562 | 571 | ||
563 | static const struct file_operations ip_vs_app_fops = { | 572 | static const struct file_operations ip_vs_app_fops = { |
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = { | |||
569 | }; | 578 | }; |
570 | #endif | 579 | #endif |
571 | 580 | ||
572 | int __init ip_vs_app_init(void) | 581 | static int __net_init __ip_vs_app_init(struct net *net) |
573 | { | 582 | { |
574 | /* we will replace it with proc_net_ipvs_create() soon */ | 583 | struct netns_ipvs *ipvs = net_ipvs(net); |
575 | proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); | 584 | |
585 | INIT_LIST_HEAD(&ipvs->app_list); | ||
586 | __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); | ||
587 | proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); | ||
576 | return 0; | 588 | return 0; |
577 | } | 589 | } |
578 | 590 | ||
591 | static void __net_exit __ip_vs_app_cleanup(struct net *net) | ||
592 | { | ||
593 | proc_net_remove(net, "ip_vs_app"); | ||
594 | } | ||
595 | |||
596 | static struct pernet_operations ip_vs_app_ops = { | ||
597 | .init = __ip_vs_app_init, | ||
598 | .exit = __ip_vs_app_cleanup, | ||
599 | }; | ||
600 | |||
601 | int __init ip_vs_app_init(void) | ||
602 | { | ||
603 | int rv; | ||
604 | |||
605 | rv = register_pernet_subsys(&ip_vs_app_ops); | ||
606 | return rv; | ||
607 | } | ||
608 | |||
579 | 609 | ||
580 | void ip_vs_app_cleanup(void) | 610 | void ip_vs_app_cleanup(void) |
581 | { | 611 | { |
582 | proc_net_remove(&init_net, "ip_vs_app"); | 612 | unregister_pernet_subsys(&ip_vs_app_ops); |
583 | } | 613 | } |
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e9adecdc8ca4..83233fe24a08 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c | |||
@@ -48,35 +48,32 @@ | |||
48 | /* | 48 | /* |
49 | * Connection hash size. Default is what was selected at compile time. | 49 | * Connection hash size. Default is what was selected at compile time. |
50 | */ | 50 | */ |
51 | int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; | 51 | static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; |
52 | module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); | 52 | module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); |
53 | MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); | 53 | MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); |
54 | 54 | ||
55 | /* size and mask values */ | 55 | /* size and mask values */ |
56 | int ip_vs_conn_tab_size; | 56 | int ip_vs_conn_tab_size __read_mostly; |
57 | int ip_vs_conn_tab_mask; | 57 | static int ip_vs_conn_tab_mask __read_mostly; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Connection hash table: for input and output packets lookups of IPVS | 60 | * Connection hash table: for input and output packets lookups of IPVS |
61 | */ | 61 | */ |
62 | static struct list_head *ip_vs_conn_tab; | 62 | static struct list_head *ip_vs_conn_tab __read_mostly; |
63 | 63 | ||
64 | /* SLAB cache for IPVS connections */ | 64 | /* SLAB cache for IPVS connections */ |
65 | static struct kmem_cache *ip_vs_conn_cachep __read_mostly; | 65 | static struct kmem_cache *ip_vs_conn_cachep __read_mostly; |
66 | 66 | ||
67 | /* counter for current IPVS connections */ | ||
68 | static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); | ||
69 | |||
70 | /* counter for no client port connections */ | 67 | /* counter for no client port connections */ |
71 | static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); | 68 | static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); |
72 | 69 | ||
73 | /* random value for IPVS connection hash */ | 70 | /* random value for IPVS connection hash */ |
74 | static unsigned int ip_vs_conn_rnd; | 71 | static unsigned int ip_vs_conn_rnd __read_mostly; |
75 | 72 | ||
76 | /* | 73 | /* |
77 | * Fine locking granularity for big connection hash table | 74 | * Fine locking granularity for big connection hash table |
78 | */ | 75 | */ |
79 | #define CT_LOCKARRAY_BITS 4 | 76 | #define CT_LOCKARRAY_BITS 5 |
80 | #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) | 77 | #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) |
81 | #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) | 78 | #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) |
82 | 79 | ||
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key) | |||
133 | /* | 130 | /* |
134 | * Returns hash value for IPVS connection entry | 131 | * Returns hash value for IPVS connection entry |
135 | */ | 132 | */ |
136 | static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, | 133 | static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto, |
137 | const union nf_inet_addr *addr, | 134 | const union nf_inet_addr *addr, |
138 | __be16 port) | 135 | __be16 port) |
139 | { | 136 | { |
140 | #ifdef CONFIG_IP_VS_IPV6 | 137 | #ifdef CONFIG_IP_VS_IPV6 |
141 | if (af == AF_INET6) | 138 | if (af == AF_INET6) |
142 | return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), | 139 | return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), |
143 | (__force u32)port, proto, ip_vs_conn_rnd) | 140 | (__force u32)port, proto, ip_vs_conn_rnd) ^ |
144 | & ip_vs_conn_tab_mask; | 141 | ((size_t)net>>8)) & ip_vs_conn_tab_mask; |
145 | #endif | 142 | #endif |
146 | return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, | 143 | return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, |
147 | ip_vs_conn_rnd) | 144 | ip_vs_conn_rnd) ^ |
148 | & ip_vs_conn_tab_mask; | 145 | ((size_t)net>>8)) & ip_vs_conn_tab_mask; |
149 | } | 146 | } |
150 | 147 | ||
151 | static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, | 148 | static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, |
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, | |||
166 | port = p->vport; | 163 | port = p->vport; |
167 | } | 164 | } |
168 | 165 | ||
169 | return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); | 166 | return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); |
170 | } | 167 | } |
171 | 168 | ||
172 | static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) | 169 | static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) |
173 | { | 170 | { |
174 | struct ip_vs_conn_param p; | 171 | struct ip_vs_conn_param p; |
175 | 172 | ||
176 | ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, | 173 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, |
177 | NULL, 0, &p); | 174 | &cp->caddr, cp->cport, NULL, 0, &p); |
178 | 175 | ||
179 | if (cp->dest && cp->dest->svc->pe) { | 176 | if (cp->pe) { |
180 | p.pe = cp->dest->svc->pe; | 177 | p.pe = cp->pe; |
181 | p.pe_data = cp->pe_data; | 178 | p.pe_data = cp->pe_data; |
182 | p.pe_data_len = cp->pe_data_len; | 179 | p.pe_data_len = cp->pe_data_len; |
183 | } | 180 | } |
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) | |||
186 | } | 183 | } |
187 | 184 | ||
188 | /* | 185 | /* |
189 | * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. | 186 | * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. |
190 | * returns bool success. | 187 | * returns bool success. |
191 | */ | 188 | */ |
192 | static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) | 189 | static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) |
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) | |||
269 | 266 | ||
270 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 267 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
271 | if (cp->af == p->af && | 268 | if (cp->af == p->af && |
269 | p->cport == cp->cport && p->vport == cp->vport && | ||
272 | ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && | 270 | ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && |
273 | ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && | 271 | ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && |
274 | p->cport == cp->cport && p->vport == cp->vport && | ||
275 | ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && | 272 | ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && |
276 | p->protocol == cp->protocol) { | 273 | p->protocol == cp->protocol && |
274 | ip_vs_conn_net_eq(cp, p->net)) { | ||
277 | /* HIT */ | 275 | /* HIT */ |
278 | atomic_inc(&cp->refcnt); | 276 | atomic_inc(&cp->refcnt); |
279 | ct_read_unlock(hash); | 277 | ct_read_unlock(hash); |
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, | |||
313 | struct ip_vs_conn_param *p) | 311 | struct ip_vs_conn_param *p) |
314 | { | 312 | { |
315 | __be16 _ports[2], *pptr; | 313 | __be16 _ports[2], *pptr; |
314 | struct net *net = skb_net(skb); | ||
316 | 315 | ||
317 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); | 316 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); |
318 | if (pptr == NULL) | 317 | if (pptr == NULL) |
319 | return 1; | 318 | return 1; |
320 | 319 | ||
321 | if (likely(!inverse)) | 320 | if (likely(!inverse)) |
322 | ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], | 321 | ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, |
323 | &iph->daddr, pptr[1], p); | 322 | pptr[0], &iph->daddr, pptr[1], p); |
324 | else | 323 | else |
325 | ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], | 324 | ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, |
326 | &iph->saddr, pptr[0], p); | 325 | pptr[1], &iph->saddr, pptr[0], p); |
327 | return 0; | 326 | return 0; |
328 | } | 327 | } |
329 | 328 | ||
330 | struct ip_vs_conn * | 329 | struct ip_vs_conn * |
331 | ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | 330 | ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, |
332 | struct ip_vs_protocol *pp, | ||
333 | const struct ip_vs_iphdr *iph, | 331 | const struct ip_vs_iphdr *iph, |
334 | unsigned int proto_off, int inverse) | 332 | unsigned int proto_off, int inverse) |
335 | { | 333 | { |
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) | |||
353 | ct_read_lock(hash); | 351 | ct_read_lock(hash); |
354 | 352 | ||
355 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 353 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
354 | if (!ip_vs_conn_net_eq(cp, p->net)) | ||
355 | continue; | ||
356 | if (p->pe_data && p->pe->ct_match) { | 356 | if (p->pe_data && p->pe->ct_match) { |
357 | if (p->pe->ct_match(p, cp)) | 357 | if (p->pe == cp->pe && p->pe->ct_match(p, cp)) |
358 | goto out; | 358 | goto out; |
359 | continue; | 359 | continue; |
360 | } | 360 | } |
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) | |||
404 | 404 | ||
405 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 405 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
406 | if (cp->af == p->af && | 406 | if (cp->af == p->af && |
407 | p->vport == cp->cport && p->cport == cp->dport && | ||
407 | ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && | 408 | ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && |
408 | ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && | 409 | ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && |
409 | p->vport == cp->cport && p->cport == cp->dport && | 410 | p->protocol == cp->protocol && |
410 | p->protocol == cp->protocol) { | 411 | ip_vs_conn_net_eq(cp, p->net)) { |
411 | /* HIT */ | 412 | /* HIT */ |
412 | atomic_inc(&cp->refcnt); | 413 | atomic_inc(&cp->refcnt); |
413 | ret = cp; | 414 | ret = cp; |
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) | |||
428 | 429 | ||
429 | struct ip_vs_conn * | 430 | struct ip_vs_conn * |
430 | ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, | 431 | ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, |
431 | struct ip_vs_protocol *pp, | ||
432 | const struct ip_vs_iphdr *iph, | 432 | const struct ip_vs_iphdr *iph, |
433 | unsigned int proto_off, int inverse) | 433 | unsigned int proto_off, int inverse) |
434 | { | 434 | { |
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) | |||
611 | struct ip_vs_dest *dest; | 611 | struct ip_vs_dest *dest; |
612 | 612 | ||
613 | if ((cp) && (!cp->dest)) { | 613 | if ((cp) && (!cp->dest)) { |
614 | dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, | 614 | dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, |
615 | &cp->vaddr, cp->vport, | 615 | cp->dport, &cp->vaddr, cp->vport, |
616 | cp->protocol); | 616 | cp->protocol, cp->fwmark); |
617 | ip_vs_bind_dest(cp, dest); | 617 | ip_vs_bind_dest(cp, dest); |
618 | return dest; | 618 | return dest; |
619 | } else | 619 | } else |
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) | |||
686 | int ip_vs_check_template(struct ip_vs_conn *ct) | 686 | int ip_vs_check_template(struct ip_vs_conn *ct) |
687 | { | 687 | { |
688 | struct ip_vs_dest *dest = ct->dest; | 688 | struct ip_vs_dest *dest = ct->dest; |
689 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); | ||
689 | 690 | ||
690 | /* | 691 | /* |
691 | * Checking the dest server status. | 692 | * Checking the dest server status. |
692 | */ | 693 | */ |
693 | if ((dest == NULL) || | 694 | if ((dest == NULL) || |
694 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || | 695 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || |
695 | (sysctl_ip_vs_expire_quiescent_template && | 696 | (ipvs->sysctl_expire_quiescent_template && |
696 | (atomic_read(&dest->weight) == 0))) { | 697 | (atomic_read(&dest->weight) == 0))) { |
697 | IP_VS_DBG_BUF(9, "check_template: dest not available for " | 698 | IP_VS_DBG_BUF(9, "check_template: dest not available for " |
698 | "protocol %s s:%s:%d v:%s:%d " | 699 | "protocol %s s:%s:%d v:%s:%d " |
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) | |||
730 | static void ip_vs_conn_expire(unsigned long data) | 731 | static void ip_vs_conn_expire(unsigned long data) |
731 | { | 732 | { |
732 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; | 733 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; |
734 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
733 | 735 | ||
734 | cp->timeout = 60*HZ; | 736 | cp->timeout = 60*HZ; |
735 | 737 | ||
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data) | |||
765 | if (cp->flags & IP_VS_CONN_F_NFCT) | 767 | if (cp->flags & IP_VS_CONN_F_NFCT) |
766 | ip_vs_conn_drop_conntrack(cp); | 768 | ip_vs_conn_drop_conntrack(cp); |
767 | 769 | ||
770 | ip_vs_pe_put(cp->pe); | ||
768 | kfree(cp->pe_data); | 771 | kfree(cp->pe_data); |
769 | if (unlikely(cp->app != NULL)) | 772 | if (unlikely(cp->app != NULL)) |
770 | ip_vs_unbind_app(cp); | 773 | ip_vs_unbind_app(cp); |
771 | ip_vs_unbind_dest(cp); | 774 | ip_vs_unbind_dest(cp); |
772 | if (cp->flags & IP_VS_CONN_F_NO_CPORT) | 775 | if (cp->flags & IP_VS_CONN_F_NO_CPORT) |
773 | atomic_dec(&ip_vs_conn_no_cport_cnt); | 776 | atomic_dec(&ip_vs_conn_no_cport_cnt); |
774 | atomic_dec(&ip_vs_conn_count); | 777 | atomic_dec(&ipvs->conn_count); |
775 | 778 | ||
776 | kmem_cache_free(ip_vs_conn_cachep, cp); | 779 | kmem_cache_free(ip_vs_conn_cachep, cp); |
777 | return; | 780 | return; |
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) | |||
802 | struct ip_vs_conn * | 805 | struct ip_vs_conn * |
803 | ip_vs_conn_new(const struct ip_vs_conn_param *p, | 806 | ip_vs_conn_new(const struct ip_vs_conn_param *p, |
804 | const union nf_inet_addr *daddr, __be16 dport, unsigned flags, | 807 | const union nf_inet_addr *daddr, __be16 dport, unsigned flags, |
805 | struct ip_vs_dest *dest) | 808 | struct ip_vs_dest *dest, __u32 fwmark) |
806 | { | 809 | { |
807 | struct ip_vs_conn *cp; | 810 | struct ip_vs_conn *cp; |
808 | struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); | 811 | struct netns_ipvs *ipvs = net_ipvs(p->net); |
812 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, | ||
813 | p->protocol); | ||
809 | 814 | ||
810 | cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); | 815 | cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); |
811 | if (cp == NULL) { | 816 | if (cp == NULL) { |
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
815 | 820 | ||
816 | INIT_LIST_HEAD(&cp->c_list); | 821 | INIT_LIST_HEAD(&cp->c_list); |
817 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); | 822 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); |
823 | ip_vs_conn_net_set(cp, p->net); | ||
818 | cp->af = p->af; | 824 | cp->af = p->af; |
819 | cp->protocol = p->protocol; | 825 | cp->protocol = p->protocol; |
820 | ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); | 826 | ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); |
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
826 | &cp->daddr, daddr); | 832 | &cp->daddr, daddr); |
827 | cp->dport = dport; | 833 | cp->dport = dport; |
828 | cp->flags = flags; | 834 | cp->flags = flags; |
829 | if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { | 835 | cp->fwmark = fwmark; |
836 | if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { | ||
837 | ip_vs_pe_get(p->pe); | ||
838 | cp->pe = p->pe; | ||
830 | cp->pe_data = p->pe_data; | 839 | cp->pe_data = p->pe_data; |
831 | cp->pe_data_len = p->pe_data_len; | 840 | cp->pe_data_len = p->pe_data_len; |
832 | } | 841 | } |
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
842 | atomic_set(&cp->n_control, 0); | 851 | atomic_set(&cp->n_control, 0); |
843 | atomic_set(&cp->in_pkts, 0); | 852 | atomic_set(&cp->in_pkts, 0); |
844 | 853 | ||
845 | atomic_inc(&ip_vs_conn_count); | 854 | atomic_inc(&ipvs->conn_count); |
846 | if (flags & IP_VS_CONN_F_NO_CPORT) | 855 | if (flags & IP_VS_CONN_F_NO_CPORT) |
847 | atomic_inc(&ip_vs_conn_no_cport_cnt); | 856 | atomic_inc(&ip_vs_conn_no_cport_cnt); |
848 | 857 | ||
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
861 | #endif | 870 | #endif |
862 | ip_vs_bind_xmit(cp); | 871 | ip_vs_bind_xmit(cp); |
863 | 872 | ||
864 | if (unlikely(pp && atomic_read(&pp->appcnt))) | 873 | if (unlikely(pd && atomic_read(&pd->appcnt))) |
865 | ip_vs_bind_app(cp, pp); | 874 | ip_vs_bind_app(cp, pd->pp); |
866 | 875 | ||
867 | /* | 876 | /* |
868 | * Allow conntrack to be preserved. By default, conntrack | 877 | * Allow conntrack to be preserved. By default, conntrack |
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
871 | * IP_VS_CONN_F_ONE_PACKET too. | 880 | * IP_VS_CONN_F_ONE_PACKET too. |
872 | */ | 881 | */ |
873 | 882 | ||
874 | if (ip_vs_conntrack_enabled()) | 883 | if (ip_vs_conntrack_enabled(ipvs)) |
875 | cp->flags |= IP_VS_CONN_F_NFCT; | 884 | cp->flags |= IP_VS_CONN_F_NFCT; |
876 | 885 | ||
877 | /* Hash it in the ip_vs_conn_tab finally */ | 886 | /* Hash it in the ip_vs_conn_tab finally */ |
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
884 | * /proc/net/ip_vs_conn entries | 893 | * /proc/net/ip_vs_conn entries |
885 | */ | 894 | */ |
886 | #ifdef CONFIG_PROC_FS | 895 | #ifdef CONFIG_PROC_FS |
896 | struct ip_vs_iter_state { | ||
897 | struct seq_net_private p; | ||
898 | struct list_head *l; | ||
899 | }; | ||
887 | 900 | ||
888 | static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) | 901 | static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) |
889 | { | 902 | { |
890 | int idx; | 903 | int idx; |
891 | struct ip_vs_conn *cp; | 904 | struct ip_vs_conn *cp; |
905 | struct ip_vs_iter_state *iter = seq->private; | ||
892 | 906 | ||
893 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { | 907 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { |
894 | ct_read_lock_bh(idx); | 908 | ct_read_lock_bh(idx); |
895 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 909 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
896 | if (pos-- == 0) { | 910 | if (pos-- == 0) { |
897 | seq->private = &ip_vs_conn_tab[idx]; | 911 | iter->l = &ip_vs_conn_tab[idx]; |
898 | return cp; | 912 | return cp; |
899 | } | 913 | } |
900 | } | 914 | } |
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) | |||
906 | 920 | ||
907 | static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) | 921 | static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) |
908 | { | 922 | { |
909 | seq->private = NULL; | 923 | struct ip_vs_iter_state *iter = seq->private; |
924 | |||
925 | iter->l = NULL; | ||
910 | return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; | 926 | return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; |
911 | } | 927 | } |
912 | 928 | ||
913 | static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 929 | static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
914 | { | 930 | { |
915 | struct ip_vs_conn *cp = v; | 931 | struct ip_vs_conn *cp = v; |
916 | struct list_head *e, *l = seq->private; | 932 | struct ip_vs_iter_state *iter = seq->private; |
933 | struct list_head *e, *l = iter->l; | ||
917 | int idx; | 934 | int idx; |
918 | 935 | ||
919 | ++*pos; | 936 | ++*pos; |
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
930 | while (++idx < ip_vs_conn_tab_size) { | 947 | while (++idx < ip_vs_conn_tab_size) { |
931 | ct_read_lock_bh(idx); | 948 | ct_read_lock_bh(idx); |
932 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 949 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
933 | seq->private = &ip_vs_conn_tab[idx]; | 950 | iter->l = &ip_vs_conn_tab[idx]; |
934 | return cp; | 951 | return cp; |
935 | } | 952 | } |
936 | ct_read_unlock_bh(idx); | 953 | ct_read_unlock_bh(idx); |
937 | } | 954 | } |
938 | seq->private = NULL; | 955 | iter->l = NULL; |
939 | return NULL; | 956 | return NULL; |
940 | } | 957 | } |
941 | 958 | ||
942 | static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) | 959 | static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) |
943 | { | 960 | { |
944 | struct list_head *l = seq->private; | 961 | struct ip_vs_iter_state *iter = seq->private; |
962 | struct list_head *l = iter->l; | ||
945 | 963 | ||
946 | if (l) | 964 | if (l) |
947 | ct_read_unlock_bh(l - ip_vs_conn_tab); | 965 | ct_read_unlock_bh(l - ip_vs_conn_tab); |
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) | |||
955 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); | 973 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); |
956 | else { | 974 | else { |
957 | const struct ip_vs_conn *cp = v; | 975 | const struct ip_vs_conn *cp = v; |
976 | struct net *net = seq_file_net(seq); | ||
958 | char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; | 977 | char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; |
959 | size_t len = 0; | 978 | size_t len = 0; |
960 | 979 | ||
961 | if (cp->dest && cp->pe_data && | 980 | if (!ip_vs_conn_net_eq(cp, net)) |
962 | cp->dest->svc->pe->show_pe_data) { | 981 | return 0; |
982 | if (cp->pe_data) { | ||
963 | pe_data[0] = ' '; | 983 | pe_data[0] = ' '; |
964 | len = strlen(cp->dest->svc->pe->name); | 984 | len = strlen(cp->pe->name); |
965 | memcpy(pe_data + 1, cp->dest->svc->pe->name, len); | 985 | memcpy(pe_data + 1, cp->pe->name, len); |
966 | pe_data[len + 1] = ' '; | 986 | pe_data[len + 1] = ' '; |
967 | len += 2; | 987 | len += 2; |
968 | len += cp->dest->svc->pe->show_pe_data(cp, | 988 | len += cp->pe->show_pe_data(cp, pe_data + len); |
969 | pe_data + len); | ||
970 | } | 989 | } |
971 | pe_data[len] = '\0'; | 990 | pe_data[len] = '\0'; |
972 | 991 | ||
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { | |||
1004 | 1023 | ||
1005 | static int ip_vs_conn_open(struct inode *inode, struct file *file) | 1024 | static int ip_vs_conn_open(struct inode *inode, struct file *file) |
1006 | { | 1025 | { |
1007 | return seq_open(file, &ip_vs_conn_seq_ops); | 1026 | return seq_open_net(inode, file, &ip_vs_conn_seq_ops, |
1027 | sizeof(struct ip_vs_iter_state)); | ||
1008 | } | 1028 | } |
1009 | 1029 | ||
1010 | static const struct file_operations ip_vs_conn_fops = { | 1030 | static const struct file_operations ip_vs_conn_fops = { |
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) | |||
1031 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); | 1051 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); |
1032 | else { | 1052 | else { |
1033 | const struct ip_vs_conn *cp = v; | 1053 | const struct ip_vs_conn *cp = v; |
1054 | struct net *net = seq_file_net(seq); | ||
1055 | |||
1056 | if (!ip_vs_conn_net_eq(cp, net)) | ||
1057 | return 0; | ||
1034 | 1058 | ||
1035 | #ifdef CONFIG_IP_VS_IPV6 | 1059 | #ifdef CONFIG_IP_VS_IPV6 |
1036 | if (cp->af == AF_INET6) | 1060 | if (cp->af == AF_INET6) |
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { | |||
1067 | 1091 | ||
1068 | static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) | 1092 | static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) |
1069 | { | 1093 | { |
1070 | return seq_open(file, &ip_vs_conn_sync_seq_ops); | 1094 | return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, |
1095 | sizeof(struct ip_vs_iter_state)); | ||
1071 | } | 1096 | } |
1072 | 1097 | ||
1073 | static const struct file_operations ip_vs_conn_sync_fops = { | 1098 | static const struct file_operations ip_vs_conn_sync_fops = { |
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) | |||
1113 | } | 1138 | } |
1114 | 1139 | ||
1115 | /* Called from keventd and must protect itself from softirqs */ | 1140 | /* Called from keventd and must protect itself from softirqs */ |
1116 | void ip_vs_random_dropentry(void) | 1141 | void ip_vs_random_dropentry(struct net *net) |
1117 | { | 1142 | { |
1118 | int idx; | 1143 | int idx; |
1119 | struct ip_vs_conn *cp; | 1144 | struct ip_vs_conn *cp; |
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void) | |||
1133 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | 1158 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
1134 | /* connection template */ | 1159 | /* connection template */ |
1135 | continue; | 1160 | continue; |
1136 | 1161 | if (!ip_vs_conn_net_eq(cp, net)) | |
1162 | continue; | ||
1137 | if (cp->protocol == IPPROTO_TCP) { | 1163 | if (cp->protocol == IPPROTO_TCP) { |
1138 | switch(cp->state) { | 1164 | switch(cp->state) { |
1139 | case IP_VS_TCP_S_SYN_RECV: | 1165 | case IP_VS_TCP_S_SYN_RECV: |
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void) | |||
1168 | /* | 1194 | /* |
1169 | * Flush all the connection entries in the ip_vs_conn_tab | 1195 | * Flush all the connection entries in the ip_vs_conn_tab |
1170 | */ | 1196 | */ |
1171 | static void ip_vs_conn_flush(void) | 1197 | static void ip_vs_conn_flush(struct net *net) |
1172 | { | 1198 | { |
1173 | int idx; | 1199 | int idx; |
1174 | struct ip_vs_conn *cp; | 1200 | struct ip_vs_conn *cp; |
1201 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1175 | 1202 | ||
1176 | flush_again: | 1203 | flush_again: |
1177 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { | 1204 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { |
1178 | /* | 1205 | /* |
1179 | * Lock is actually needed in this loop. | 1206 | * Lock is actually needed in this loop. |
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void) | |||
1181 | ct_write_lock_bh(idx); | 1208 | ct_write_lock_bh(idx); |
1182 | 1209 | ||
1183 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 1210 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
1184 | 1211 | if (!ip_vs_conn_net_eq(cp, net)) | |
1212 | continue; | ||
1185 | IP_VS_DBG(4, "del connection\n"); | 1213 | IP_VS_DBG(4, "del connection\n"); |
1186 | ip_vs_conn_expire_now(cp); | 1214 | ip_vs_conn_expire_now(cp); |
1187 | if (cp->control) { | 1215 | if (cp->control) { |
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void) | |||
1194 | 1222 | ||
1195 | /* the counter may be not NULL, because maybe some conn entries | 1223 | /* the counter may be not NULL, because maybe some conn entries |
1196 | are run by slow timer handler or unhashed but still referred */ | 1224 | are run by slow timer handler or unhashed but still referred */ |
1197 | if (atomic_read(&ip_vs_conn_count) != 0) { | 1225 | if (atomic_read(&ipvs->conn_count) != 0) { |
1198 | schedule(); | 1226 | schedule(); |
1199 | goto flush_again; | 1227 | goto flush_again; |
1200 | } | 1228 | } |
1201 | } | 1229 | } |
1230 | /* | ||
1231 | * per netns init and exit | ||
1232 | */ | ||
1233 | int __net_init __ip_vs_conn_init(struct net *net) | ||
1234 | { | ||
1235 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1236 | |||
1237 | atomic_set(&ipvs->conn_count, 0); | ||
1238 | |||
1239 | proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); | ||
1240 | proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); | ||
1241 | return 0; | ||
1242 | } | ||
1202 | 1243 | ||
1244 | static void __net_exit __ip_vs_conn_cleanup(struct net *net) | ||
1245 | { | ||
1246 | /* flush all the connection entries first */ | ||
1247 | ip_vs_conn_flush(net); | ||
1248 | proc_net_remove(net, "ip_vs_conn"); | ||
1249 | proc_net_remove(net, "ip_vs_conn_sync"); | ||
1250 | } | ||
1251 | static struct pernet_operations ipvs_conn_ops = { | ||
1252 | .init = __ip_vs_conn_init, | ||
1253 | .exit = __ip_vs_conn_cleanup, | ||
1254 | }; | ||
1203 | 1255 | ||
1204 | int __init ip_vs_conn_init(void) | 1256 | int __init ip_vs_conn_init(void) |
1205 | { | 1257 | { |
1206 | int idx; | 1258 | int idx; |
1259 | int retc; | ||
1207 | 1260 | ||
1208 | /* Compute size and mask */ | 1261 | /* Compute size and mask */ |
1209 | ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; | 1262 | ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; |
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void) | |||
1241 | rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); | 1294 | rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); |
1242 | } | 1295 | } |
1243 | 1296 | ||
1244 | proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); | 1297 | retc = register_pernet_subsys(&ipvs_conn_ops); |
1245 | proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); | ||
1246 | 1298 | ||
1247 | /* calculate the random value for connection hash */ | 1299 | /* calculate the random value for connection hash */ |
1248 | get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); | 1300 | get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); |
1249 | 1301 | ||
1250 | return 0; | 1302 | return retc; |
1251 | } | 1303 | } |
1252 | 1304 | ||
1253 | |||
1254 | void ip_vs_conn_cleanup(void) | 1305 | void ip_vs_conn_cleanup(void) |
1255 | { | 1306 | { |
1256 | /* flush all the connection entries first */ | 1307 | unregister_pernet_subsys(&ipvs_conn_ops); |
1257 | ip_vs_conn_flush(); | ||
1258 | |||
1259 | /* Release the empty cache */ | 1308 | /* Release the empty cache */ |
1260 | kmem_cache_destroy(ip_vs_conn_cachep); | 1309 | kmem_cache_destroy(ip_vs_conn_cachep); |
1261 | proc_net_remove(&init_net, "ip_vs_conn"); | ||
1262 | proc_net_remove(&init_net, "ip_vs_conn_sync"); | ||
1263 | vfree(ip_vs_conn_tab); | 1310 | vfree(ip_vs_conn_tab); |
1264 | } | 1311 | } |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4e51e9c5a04..f36a84f33efb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <net/icmp.h> /* for icmp_send */ | 41 | #include <net/icmp.h> /* for icmp_send */ |
42 | #include <net/route.h> | 42 | #include <net/route.h> |
43 | #include <net/ip6_checksum.h> | 43 | #include <net/ip6_checksum.h> |
44 | #include <net/netns/generic.h> /* net_generic() */ | ||
44 | 45 | ||
45 | #include <linux/netfilter.h> | 46 | #include <linux/netfilter.h> |
46 | #include <linux/netfilter_ipv4.h> | 47 | #include <linux/netfilter_ipv4.h> |
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put); | |||
68 | EXPORT_SYMBOL(ip_vs_get_debug_level); | 69 | EXPORT_SYMBOL(ip_vs_get_debug_level); |
69 | #endif | 70 | #endif |
70 | 71 | ||
72 | int ip_vs_net_id __read_mostly; | ||
73 | #ifdef IP_VS_GENERIC_NETNS | ||
74 | EXPORT_SYMBOL(ip_vs_net_id); | ||
75 | #endif | ||
76 | /* netns cnt used for uniqueness */ | ||
77 | static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); | ||
71 | 78 | ||
72 | /* ID used in ICMP lookups */ | 79 | /* ID used in ICMP lookups */ |
73 | #define icmp_id(icmph) (((icmph)->un).echo.id) | 80 | #define icmp_id(icmph) (((icmph)->un).echo.id) |
@@ -108,21 +115,28 @@ static inline void | |||
108 | ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | 115 | ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) |
109 | { | 116 | { |
110 | struct ip_vs_dest *dest = cp->dest; | 117 | struct ip_vs_dest *dest = cp->dest; |
118 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | ||
119 | |||
111 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 120 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
112 | spin_lock(&dest->stats.lock); | 121 | struct ip_vs_cpu_stats *s; |
113 | dest->stats.ustats.inpkts++; | 122 | |
114 | dest->stats.ustats.inbytes += skb->len; | 123 | s = this_cpu_ptr(dest->stats.cpustats); |
115 | spin_unlock(&dest->stats.lock); | 124 | s->ustats.inpkts++; |
116 | 125 | u64_stats_update_begin(&s->syncp); | |
117 | spin_lock(&dest->svc->stats.lock); | 126 | s->ustats.inbytes += skb->len; |
118 | dest->svc->stats.ustats.inpkts++; | 127 | u64_stats_update_end(&s->syncp); |
119 | dest->svc->stats.ustats.inbytes += skb->len; | 128 | |
120 | spin_unlock(&dest->svc->stats.lock); | 129 | s = this_cpu_ptr(dest->svc->stats.cpustats); |
121 | 130 | s->ustats.inpkts++; | |
122 | spin_lock(&ip_vs_stats.lock); | 131 | u64_stats_update_begin(&s->syncp); |
123 | ip_vs_stats.ustats.inpkts++; | 132 | s->ustats.inbytes += skb->len; |
124 | ip_vs_stats.ustats.inbytes += skb->len; | 133 | u64_stats_update_end(&s->syncp); |
125 | spin_unlock(&ip_vs_stats.lock); | 134 | |
135 | s = this_cpu_ptr(ipvs->cpustats); | ||
136 | s->ustats.inpkts++; | ||
137 | u64_stats_update_begin(&s->syncp); | ||
138 | s->ustats.inbytes += skb->len; | ||
139 | u64_stats_update_end(&s->syncp); | ||
126 | } | 140 | } |
127 | } | 141 | } |
128 | 142 | ||
@@ -131,21 +145,28 @@ static inline void | |||
131 | ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | 145 | ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) |
132 | { | 146 | { |
133 | struct ip_vs_dest *dest = cp->dest; | 147 | struct ip_vs_dest *dest = cp->dest; |
148 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | ||
149 | |||
134 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 150 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
135 | spin_lock(&dest->stats.lock); | 151 | struct ip_vs_cpu_stats *s; |
136 | dest->stats.ustats.outpkts++; | 152 | |
137 | dest->stats.ustats.outbytes += skb->len; | 153 | s = this_cpu_ptr(dest->stats.cpustats); |
138 | spin_unlock(&dest->stats.lock); | 154 | s->ustats.outpkts++; |
139 | 155 | u64_stats_update_begin(&s->syncp); | |
140 | spin_lock(&dest->svc->stats.lock); | 156 | s->ustats.outbytes += skb->len; |
141 | dest->svc->stats.ustats.outpkts++; | 157 | u64_stats_update_end(&s->syncp); |
142 | dest->svc->stats.ustats.outbytes += skb->len; | 158 | |
143 | spin_unlock(&dest->svc->stats.lock); | 159 | s = this_cpu_ptr(dest->svc->stats.cpustats); |
144 | 160 | s->ustats.outpkts++; | |
145 | spin_lock(&ip_vs_stats.lock); | 161 | u64_stats_update_begin(&s->syncp); |
146 | ip_vs_stats.ustats.outpkts++; | 162 | s->ustats.outbytes += skb->len; |
147 | ip_vs_stats.ustats.outbytes += skb->len; | 163 | u64_stats_update_end(&s->syncp); |
148 | spin_unlock(&ip_vs_stats.lock); | 164 | |
165 | s = this_cpu_ptr(ipvs->cpustats); | ||
166 | s->ustats.outpkts++; | ||
167 | u64_stats_update_begin(&s->syncp); | ||
168 | s->ustats.outbytes += skb->len; | ||
169 | u64_stats_update_end(&s->syncp); | ||
149 | } | 170 | } |
150 | } | 171 | } |
151 | 172 | ||
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
153 | static inline void | 174 | static inline void |
154 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) | 175 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) |
155 | { | 176 | { |
156 | spin_lock(&cp->dest->stats.lock); | 177 | struct netns_ipvs *ipvs = net_ipvs(svc->net); |
157 | cp->dest->stats.ustats.conns++; | 178 | struct ip_vs_cpu_stats *s; |
158 | spin_unlock(&cp->dest->stats.lock); | 179 | |
180 | s = this_cpu_ptr(cp->dest->stats.cpustats); | ||
181 | s->ustats.conns++; | ||
159 | 182 | ||
160 | spin_lock(&svc->stats.lock); | 183 | s = this_cpu_ptr(svc->stats.cpustats); |
161 | svc->stats.ustats.conns++; | 184 | s->ustats.conns++; |
162 | spin_unlock(&svc->stats.lock); | ||
163 | 185 | ||
164 | spin_lock(&ip_vs_stats.lock); | 186 | s = this_cpu_ptr(ipvs->cpustats); |
165 | ip_vs_stats.ustats.conns++; | 187 | s->ustats.conns++; |
166 | spin_unlock(&ip_vs_stats.lock); | ||
167 | } | 188 | } |
168 | 189 | ||
169 | 190 | ||
170 | static inline int | 191 | static inline int |
171 | ip_vs_set_state(struct ip_vs_conn *cp, int direction, | 192 | ip_vs_set_state(struct ip_vs_conn *cp, int direction, |
172 | const struct sk_buff *skb, | 193 | const struct sk_buff *skb, |
173 | struct ip_vs_protocol *pp) | 194 | struct ip_vs_proto_data *pd) |
174 | { | 195 | { |
175 | if (unlikely(!pp->state_transition)) | 196 | if (unlikely(!pd->pp->state_transition)) |
176 | return 0; | 197 | return 0; |
177 | return pp->state_transition(cp, direction, skb, pp); | 198 | return pd->pp->state_transition(cp, direction, skb, pd); |
178 | } | 199 | } |
179 | 200 | ||
180 | static inline void | 201 | static inline int |
181 | ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, | 202 | ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, |
182 | struct sk_buff *skb, int protocol, | 203 | struct sk_buff *skb, int protocol, |
183 | const union nf_inet_addr *caddr, __be16 cport, | 204 | const union nf_inet_addr *caddr, __be16 cport, |
184 | const union nf_inet_addr *vaddr, __be16 vport, | 205 | const union nf_inet_addr *vaddr, __be16 vport, |
185 | struct ip_vs_conn_param *p) | 206 | struct ip_vs_conn_param *p) |
186 | { | 207 | { |
187 | ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); | 208 | ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, |
209 | vport, p); | ||
188 | p->pe = svc->pe; | 210 | p->pe = svc->pe; |
189 | if (p->pe && p->pe->fill_param) | 211 | if (p->pe && p->pe->fill_param) |
190 | p->pe->fill_param(p, skb); | 212 | return p->pe->fill_param(p, skb); |
213 | |||
214 | return 0; | ||
191 | } | 215 | } |
192 | 216 | ||
193 | /* | 217 | /* |
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, | |||
200 | static struct ip_vs_conn * | 224 | static struct ip_vs_conn * |
201 | ip_vs_sched_persist(struct ip_vs_service *svc, | 225 | ip_vs_sched_persist(struct ip_vs_service *svc, |
202 | struct sk_buff *skb, | 226 | struct sk_buff *skb, |
203 | __be16 ports[2]) | 227 | __be16 src_port, __be16 dst_port, int *ignored) |
204 | { | 228 | { |
205 | struct ip_vs_conn *cp = NULL; | 229 | struct ip_vs_conn *cp = NULL; |
206 | struct ip_vs_iphdr iph; | 230 | struct ip_vs_iphdr iph; |
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
224 | 248 | ||
225 | IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " | 249 | IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " |
226 | "mnet %s\n", | 250 | "mnet %s\n", |
227 | IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), | 251 | IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), |
228 | IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), | 252 | IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), |
229 | IP_VS_DBG_ADDR(svc->af, &snet)); | 253 | IP_VS_DBG_ADDR(svc->af, &snet)); |
230 | 254 | ||
231 | /* | 255 | /* |
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
247 | const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; | 271 | const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; |
248 | __be16 vport = 0; | 272 | __be16 vport = 0; |
249 | 273 | ||
250 | if (ports[1] == svc->port) { | 274 | if (dst_port == svc->port) { |
251 | /* non-FTP template: | 275 | /* non-FTP template: |
252 | * <protocol, caddr, 0, vaddr, vport, daddr, dport> | 276 | * <protocol, caddr, 0, vaddr, vport, daddr, dport> |
253 | * FTP template: | 277 | * FTP template: |
254 | * <protocol, caddr, 0, vaddr, 0, daddr, 0> | 278 | * <protocol, caddr, 0, vaddr, 0, daddr, 0> |
255 | */ | 279 | */ |
256 | if (svc->port != FTPPORT) | 280 | if (svc->port != FTPPORT) |
257 | vport = ports[1]; | 281 | vport = dst_port; |
258 | } else { | 282 | } else { |
259 | /* Note: persistent fwmark-based services and | 283 | /* Note: persistent fwmark-based services and |
260 | * persistent port zero service are handled here. | 284 | * persistent port zero service are handled here. |
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
268 | vaddr = &fwmark; | 292 | vaddr = &fwmark; |
269 | } | 293 | } |
270 | } | 294 | } |
271 | ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, | 295 | /* return *ignored = -1 so NF_DROP can be used */ |
272 | vaddr, vport, ¶m); | 296 | if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, |
297 | vaddr, vport, ¶m) < 0) { | ||
298 | *ignored = -1; | ||
299 | return NULL; | ||
300 | } | ||
273 | } | 301 | } |
274 | 302 | ||
275 | /* Check if a template already exists */ | 303 | /* Check if a template already exists */ |
276 | ct = ip_vs_ct_in_get(¶m); | 304 | ct = ip_vs_ct_in_get(¶m); |
277 | if (!ct || !ip_vs_check_template(ct)) { | 305 | if (!ct || !ip_vs_check_template(ct)) { |
278 | /* No template found or the dest of the connection | 306 | /* |
307 | * No template found or the dest of the connection | ||
279 | * template is not available. | 308 | * template is not available. |
309 | * return *ignored=0 i.e. ICMP and NF_DROP | ||
280 | */ | 310 | */ |
281 | dest = svc->scheduler->schedule(svc, skb); | 311 | dest = svc->scheduler->schedule(svc, skb); |
282 | if (!dest) { | 312 | if (!dest) { |
283 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); | 313 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); |
284 | kfree(param.pe_data); | 314 | kfree(param.pe_data); |
315 | *ignored = 0; | ||
285 | return NULL; | 316 | return NULL; |
286 | } | 317 | } |
287 | 318 | ||
288 | if (ports[1] == svc->port && svc->port != FTPPORT) | 319 | if (dst_port == svc->port && svc->port != FTPPORT) |
289 | dport = dest->port; | 320 | dport = dest->port; |
290 | 321 | ||
291 | /* Create a template | 322 | /* Create a template |
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
293 | * and thus param.pe_data will be destroyed | 324 | * and thus param.pe_data will be destroyed |
294 | * when the template expires */ | 325 | * when the template expires */ |
295 | ct = ip_vs_conn_new(¶m, &dest->addr, dport, | 326 | ct = ip_vs_conn_new(¶m, &dest->addr, dport, |
296 | IP_VS_CONN_F_TEMPLATE, dest); | 327 | IP_VS_CONN_F_TEMPLATE, dest, skb->mark); |
297 | if (ct == NULL) { | 328 | if (ct == NULL) { |
298 | kfree(param.pe_data); | 329 | kfree(param.pe_data); |
330 | *ignored = -1; | ||
299 | return NULL; | 331 | return NULL; |
300 | } | 332 | } |
301 | 333 | ||
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
306 | kfree(param.pe_data); | 338 | kfree(param.pe_data); |
307 | } | 339 | } |
308 | 340 | ||
309 | dport = ports[1]; | 341 | dport = dst_port; |
310 | if (dport == svc->port && dest->port) | 342 | if (dport == svc->port && dest->port) |
311 | dport = dest->port; | 343 | dport = dest->port; |
312 | 344 | ||
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
317 | /* | 349 | /* |
318 | * Create a new connection according to the template | 350 | * Create a new connection according to the template |
319 | */ | 351 | */ |
320 | ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], | 352 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, |
321 | &iph.daddr, ports[1], ¶m); | 353 | src_port, &iph.daddr, dst_port, ¶m); |
322 | cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest); | 354 | |
355 | cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); | ||
323 | if (cp == NULL) { | 356 | if (cp == NULL) { |
324 | ip_vs_conn_put(ct); | 357 | ip_vs_conn_put(ct); |
358 | *ignored = -1; | ||
325 | return NULL; | 359 | return NULL; |
326 | } | 360 | } |
327 | 361 | ||
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
341 | * It selects a server according to the virtual service, and | 375 | * It selects a server according to the virtual service, and |
342 | * creates a connection entry. | 376 | * creates a connection entry. |
343 | * Protocols supported: TCP, UDP | 377 | * Protocols supported: TCP, UDP |
378 | * | ||
379 | * Usage of *ignored | ||
380 | * | ||
381 | * 1 : protocol tried to schedule (eg. on SYN), found svc but the | ||
382 | * svc/scheduler decides that this packet should be accepted with | ||
383 | * NF_ACCEPT because it must not be scheduled. | ||
384 | * | ||
385 | * 0 : scheduler can not find destination, so try bypass or | ||
386 | * return ICMP and then NF_DROP (ip_vs_leave). | ||
387 | * | ||
388 | * -1 : scheduler tried to schedule but fatal error occurred, eg. | ||
389 | * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param | ||
390 | * failure such as missing Call-ID, ENOMEM on skb_linearize | ||
391 | * or pe_data. In this case we should return NF_DROP without | ||
392 | * any attempts to send ICMP with ip_vs_leave. | ||
344 | */ | 393 | */ |
345 | struct ip_vs_conn * | 394 | struct ip_vs_conn * |
346 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | 395 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, |
347 | struct ip_vs_protocol *pp, int *ignored) | 396 | struct ip_vs_proto_data *pd, int *ignored) |
348 | { | 397 | { |
398 | struct ip_vs_protocol *pp = pd->pp; | ||
349 | struct ip_vs_conn *cp = NULL; | 399 | struct ip_vs_conn *cp = NULL; |
350 | struct ip_vs_iphdr iph; | 400 | struct ip_vs_iphdr iph; |
351 | struct ip_vs_dest *dest; | 401 | struct ip_vs_dest *dest; |
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
371 | } | 421 | } |
372 | 422 | ||
373 | /* | 423 | /* |
374 | * Do not schedule replies from local real server. It is risky | 424 | * Do not schedule replies from local real server. |
375 | * for fwmark services but mostly for persistent services. | ||
376 | */ | 425 | */ |
377 | if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | 426 | if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && |
378 | (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && | 427 | (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { |
379 | (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { | ||
380 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, | 428 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, |
381 | "Not scheduling reply for existing connection"); | 429 | "Not scheduling reply for existing connection"); |
382 | __ip_vs_conn_put(cp); | 430 | __ip_vs_conn_put(cp); |
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
386 | /* | 434 | /* |
387 | * Persistent service | 435 | * Persistent service |
388 | */ | 436 | */ |
389 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) { | 437 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) |
390 | *ignored = 0; | 438 | return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); |
391 | return ip_vs_sched_persist(svc, skb, pptr); | 439 | |
392 | } | 440 | *ignored = 0; |
393 | 441 | ||
394 | /* | 442 | /* |
395 | * Non-persistent service | 443 | * Non-persistent service |
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
402 | return NULL; | 450 | return NULL; |
403 | } | 451 | } |
404 | 452 | ||
405 | *ignored = 0; | ||
406 | |||
407 | dest = svc->scheduler->schedule(svc, skb); | 453 | dest = svc->scheduler->schedule(svc, skb); |
408 | if (dest == NULL) { | 454 | if (dest == NULL) { |
409 | IP_VS_DBG(1, "Schedule: no dest found.\n"); | 455 | IP_VS_DBG(1, "Schedule: no dest found.\n"); |
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
419 | */ | 465 | */ |
420 | { | 466 | { |
421 | struct ip_vs_conn_param p; | 467 | struct ip_vs_conn_param p; |
422 | ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, | 468 | |
423 | pptr[0], &iph.daddr, pptr[1], &p); | 469 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, |
470 | &iph.saddr, pptr[0], &iph.daddr, pptr[1], | ||
471 | &p); | ||
424 | cp = ip_vs_conn_new(&p, &dest->addr, | 472 | cp = ip_vs_conn_new(&p, &dest->addr, |
425 | dest->port ? dest->port : pptr[1], | 473 | dest->port ? dest->port : pptr[1], |
426 | flags, dest); | 474 | flags, dest, skb->mark); |
427 | if (!cp) | 475 | if (!cp) { |
476 | *ignored = -1; | ||
428 | return NULL; | 477 | return NULL; |
478 | } | ||
429 | } | 479 | } |
430 | 480 | ||
431 | IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " | 481 | IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " |
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
447 | * no destination is available for a new connection. | 497 | * no destination is available for a new connection. |
448 | */ | 498 | */ |
449 | int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | 499 | int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, |
450 | struct ip_vs_protocol *pp) | 500 | struct ip_vs_proto_data *pd) |
451 | { | 501 | { |
502 | struct net *net; | ||
503 | struct netns_ipvs *ipvs; | ||
452 | __be16 _ports[2], *pptr; | 504 | __be16 _ports[2], *pptr; |
453 | struct ip_vs_iphdr iph; | 505 | struct ip_vs_iphdr iph; |
454 | int unicast; | 506 | int unicast; |
507 | |||
455 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); | 508 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); |
456 | 509 | ||
457 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); | 510 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); |
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
459 | ip_vs_service_put(svc); | 512 | ip_vs_service_put(svc); |
460 | return NF_DROP; | 513 | return NF_DROP; |
461 | } | 514 | } |
515 | net = skb_net(skb); | ||
462 | 516 | ||
463 | #ifdef CONFIG_IP_VS_IPV6 | 517 | #ifdef CONFIG_IP_VS_IPV6 |
464 | if (svc->af == AF_INET6) | 518 | if (svc->af == AF_INET6) |
465 | unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; | 519 | unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; |
466 | else | 520 | else |
467 | #endif | 521 | #endif |
468 | unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); | 522 | unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); |
469 | 523 | ||
470 | /* if it is fwmark-based service, the cache_bypass sysctl is up | 524 | /* if it is fwmark-based service, the cache_bypass sysctl is up |
471 | and the destination is a non-local unicast, then create | 525 | and the destination is a non-local unicast, then create |
472 | a cache_bypass connection entry */ | 526 | a cache_bypass connection entry */ |
473 | if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { | 527 | ipvs = net_ipvs(net); |
528 | if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { | ||
474 | int ret, cs; | 529 | int ret, cs; |
475 | struct ip_vs_conn *cp; | 530 | struct ip_vs_conn *cp; |
476 | unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && | 531 | unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && |
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
484 | IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); | 539 | IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); |
485 | { | 540 | { |
486 | struct ip_vs_conn_param p; | 541 | struct ip_vs_conn_param p; |
487 | ip_vs_conn_fill_param(svc->af, iph.protocol, | 542 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, |
488 | &iph.saddr, pptr[0], | 543 | &iph.saddr, pptr[0], |
489 | &iph.daddr, pptr[1], &p); | 544 | &iph.daddr, pptr[1], &p); |
490 | cp = ip_vs_conn_new(&p, &daddr, 0, | 545 | cp = ip_vs_conn_new(&p, &daddr, 0, |
491 | IP_VS_CONN_F_BYPASS | flags, | 546 | IP_VS_CONN_F_BYPASS | flags, |
492 | NULL); | 547 | NULL, skb->mark); |
493 | if (!cp) | 548 | if (!cp) |
494 | return NF_DROP; | 549 | return NF_DROP; |
495 | } | 550 | } |
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
498 | ip_vs_in_stats(cp, skb); | 553 | ip_vs_in_stats(cp, skb); |
499 | 554 | ||
500 | /* set state */ | 555 | /* set state */ |
501 | cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); | 556 | cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); |
502 | 557 | ||
503 | /* transmit the first SYN packet */ | 558 | /* transmit the first SYN packet */ |
504 | ret = cp->packet_xmit(skb, cp, pp); | 559 | ret = cp->packet_xmit(skb, cp, pd->pp); |
505 | /* do not touch skb anymore */ | 560 | /* do not touch skb anymore */ |
506 | 561 | ||
507 | atomic_inc(&cp->in_pkts); | 562 | atomic_inc(&cp->in_pkts); |
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
682 | struct ip_vs_protocol *pp, | 737 | struct ip_vs_protocol *pp, |
683 | unsigned int offset, unsigned int ihl) | 738 | unsigned int offset, unsigned int ihl) |
684 | { | 739 | { |
740 | struct netns_ipvs *ipvs; | ||
685 | unsigned int verdict = NF_DROP; | 741 | unsigned int verdict = NF_DROP; |
686 | 742 | ||
687 | if (IP_VS_FWD_METHOD(cp) != 0) { | 743 | if (IP_VS_FWD_METHOD(cp) != 0) { |
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
703 | if (!skb_make_writable(skb, offset)) | 759 | if (!skb_make_writable(skb, offset)) |
704 | goto out; | 760 | goto out; |
705 | 761 | ||
762 | ipvs = net_ipvs(skb_net(skb)); | ||
763 | |||
706 | #ifdef CONFIG_IP_VS_IPV6 | 764 | #ifdef CONFIG_IP_VS_IPV6 |
707 | if (af == AF_INET6) | 765 | if (af == AF_INET6) |
708 | ip_vs_nat_icmp_v6(skb, pp, cp, 1); | 766 | ip_vs_nat_icmp_v6(skb, pp, cp, 1); |
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
712 | 770 | ||
713 | #ifdef CONFIG_IP_VS_IPV6 | 771 | #ifdef CONFIG_IP_VS_IPV6 |
714 | if (af == AF_INET6) { | 772 | if (af == AF_INET6) { |
715 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) | 773 | if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) |
716 | goto out; | 774 | goto out; |
717 | } else | 775 | } else |
718 | #endif | 776 | #endif |
719 | if ((sysctl_ip_vs_snat_reroute || | 777 | if ((ipvs->sysctl_snat_reroute || |
720 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && | 778 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && |
721 | ip_route_me_harder(skb, RTN_LOCAL) != 0) | 779 | ip_route_me_harder(skb, RTN_LOCAL) != 0) |
722 | goto out; | 780 | goto out; |
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, | |||
808 | 866 | ||
809 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | 867 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); |
810 | /* The embedded headers contain source and dest in reverse order */ | 868 | /* The embedded headers contain source and dest in reverse order */ |
811 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | 869 | cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); |
812 | if (!cp) | 870 | if (!cp) |
813 | return NF_ACCEPT; | 871 | return NF_ACCEPT; |
814 | 872 | ||
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, | |||
885 | 943 | ||
886 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); | 944 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); |
887 | /* The embedded headers contain source and dest in reverse order */ | 945 | /* The embedded headers contain source and dest in reverse order */ |
888 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | 946 | cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); |
889 | if (!cp) | 947 | if (!cp) |
890 | return NF_ACCEPT; | 948 | return NF_ACCEPT; |
891 | 949 | ||
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) | |||
924 | * Used for NAT and local client. | 982 | * Used for NAT and local client. |
925 | */ | 983 | */ |
926 | static unsigned int | 984 | static unsigned int |
927 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 985 | handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
928 | struct ip_vs_conn *cp, int ihl) | 986 | struct ip_vs_conn *cp, int ihl) |
929 | { | 987 | { |
988 | struct ip_vs_protocol *pp = pd->pp; | ||
989 | struct netns_ipvs *ipvs; | ||
990 | |||
930 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); | 991 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); |
931 | 992 | ||
932 | if (!skb_make_writable(skb, ihl)) | 993 | if (!skb_make_writable(skb, ihl)) |
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
961 | * if it came from this machine itself. So re-compute | 1022 | * if it came from this machine itself. So re-compute |
962 | * the routing information. | 1023 | * the routing information. |
963 | */ | 1024 | */ |
1025 | ipvs = net_ipvs(skb_net(skb)); | ||
1026 | |||
964 | #ifdef CONFIG_IP_VS_IPV6 | 1027 | #ifdef CONFIG_IP_VS_IPV6 |
965 | if (af == AF_INET6) { | 1028 | if (af == AF_INET6) { |
966 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) | 1029 | if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) |
967 | goto drop; | 1030 | goto drop; |
968 | } else | 1031 | } else |
969 | #endif | 1032 | #endif |
970 | if ((sysctl_ip_vs_snat_reroute || | 1033 | if ((ipvs->sysctl_snat_reroute || |
971 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && | 1034 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && |
972 | ip_route_me_harder(skb, RTN_LOCAL) != 0) | 1035 | ip_route_me_harder(skb, RTN_LOCAL) != 0) |
973 | goto drop; | 1036 | goto drop; |
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
975 | IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); | 1038 | IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); |
976 | 1039 | ||
977 | ip_vs_out_stats(cp, skb); | 1040 | ip_vs_out_stats(cp, skb); |
978 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | 1041 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); |
979 | skb->ipvs_property = 1; | 1042 | skb->ipvs_property = 1; |
980 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) | 1043 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
981 | ip_vs_notrack(skb); | 1044 | ip_vs_notrack(skb); |
@@ -999,9 +1062,12 @@ drop: | |||
999 | static unsigned int | 1062 | static unsigned int |
1000 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | 1063 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) |
1001 | { | 1064 | { |
1065 | struct net *net = NULL; | ||
1002 | struct ip_vs_iphdr iph; | 1066 | struct ip_vs_iphdr iph; |
1003 | struct ip_vs_protocol *pp; | 1067 | struct ip_vs_protocol *pp; |
1068 | struct ip_vs_proto_data *pd; | ||
1004 | struct ip_vs_conn *cp; | 1069 | struct ip_vs_conn *cp; |
1070 | struct netns_ipvs *ipvs; | ||
1005 | 1071 | ||
1006 | EnterFunction(11); | 1072 | EnterFunction(11); |
1007 | 1073 | ||
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1022 | if (unlikely(!skb_dst(skb))) | 1088 | if (unlikely(!skb_dst(skb))) |
1023 | return NF_ACCEPT; | 1089 | return NF_ACCEPT; |
1024 | 1090 | ||
1091 | net = skb_net(skb); | ||
1025 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1092 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1026 | #ifdef CONFIG_IP_VS_IPV6 | 1093 | #ifdef CONFIG_IP_VS_IPV6 |
1027 | if (af == AF_INET6) { | 1094 | if (af == AF_INET6) { |
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1045 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1112 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1046 | } | 1113 | } |
1047 | 1114 | ||
1048 | pp = ip_vs_proto_get(iph.protocol); | 1115 | pd = ip_vs_proto_data_get(net, iph.protocol); |
1049 | if (unlikely(!pp)) | 1116 | if (unlikely(!pd)) |
1050 | return NF_ACCEPT; | 1117 | return NF_ACCEPT; |
1118 | pp = pd->pp; | ||
1051 | 1119 | ||
1052 | /* reassemble IP fragments */ | 1120 | /* reassemble IP fragments */ |
1053 | #ifdef CONFIG_IP_VS_IPV6 | 1121 | #ifdef CONFIG_IP_VS_IPV6 |
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1073 | /* | 1141 | /* |
1074 | * Check if the packet belongs to an existing entry | 1142 | * Check if the packet belongs to an existing entry |
1075 | */ | 1143 | */ |
1076 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | 1144 | cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); |
1145 | ipvs = net_ipvs(net); | ||
1077 | 1146 | ||
1078 | if (likely(cp)) | 1147 | if (likely(cp)) |
1079 | return handle_response(af, skb, pp, cp, iph.len); | 1148 | return handle_response(af, skb, pd, cp, iph.len); |
1080 | if (sysctl_ip_vs_nat_icmp_send && | 1149 | if (ipvs->sysctl_nat_icmp_send && |
1081 | (pp->protocol == IPPROTO_TCP || | 1150 | (pp->protocol == IPPROTO_TCP || |
1082 | pp->protocol == IPPROTO_UDP || | 1151 | pp->protocol == IPPROTO_UDP || |
1083 | pp->protocol == IPPROTO_SCTP)) { | 1152 | pp->protocol == IPPROTO_SCTP)) { |
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1087 | sizeof(_ports), _ports); | 1156 | sizeof(_ports), _ports); |
1088 | if (pptr == NULL) | 1157 | if (pptr == NULL) |
1089 | return NF_ACCEPT; /* Not for me */ | 1158 | return NF_ACCEPT; /* Not for me */ |
1090 | if (ip_vs_lookup_real_service(af, iph.protocol, | 1159 | if (ip_vs_lookup_real_service(net, af, iph.protocol, |
1091 | &iph.saddr, | 1160 | &iph.saddr, |
1092 | pptr[0])) { | 1161 | pptr[0])) { |
1093 | /* | 1162 | /* |
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, | |||
1202 | static int | 1271 | static int |
1203 | ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | 1272 | ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) |
1204 | { | 1273 | { |
1274 | struct net *net = NULL; | ||
1205 | struct iphdr *iph; | 1275 | struct iphdr *iph; |
1206 | struct icmphdr _icmph, *ic; | 1276 | struct icmphdr _icmph, *ic; |
1207 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | 1277 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ |
1208 | struct ip_vs_iphdr ciph; | 1278 | struct ip_vs_iphdr ciph; |
1209 | struct ip_vs_conn *cp; | 1279 | struct ip_vs_conn *cp; |
1210 | struct ip_vs_protocol *pp; | 1280 | struct ip_vs_protocol *pp; |
1281 | struct ip_vs_proto_data *pd; | ||
1211 | unsigned int offset, ihl, verdict; | 1282 | unsigned int offset, ihl, verdict; |
1212 | union nf_inet_addr snet; | 1283 | union nf_inet_addr snet; |
1213 | 1284 | ||
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1249 | if (cih == NULL) | 1320 | if (cih == NULL) |
1250 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | 1321 | return NF_ACCEPT; /* The packet looks wrong, ignore */ |
1251 | 1322 | ||
1252 | pp = ip_vs_proto_get(cih->protocol); | 1323 | net = skb_net(skb); |
1253 | if (!pp) | 1324 | pd = ip_vs_proto_data_get(net, cih->protocol); |
1325 | if (!pd) | ||
1254 | return NF_ACCEPT; | 1326 | return NF_ACCEPT; |
1327 | pp = pd->pp; | ||
1255 | 1328 | ||
1256 | /* Is the embedded protocol header present? */ | 1329 | /* Is the embedded protocol header present? */ |
1257 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && | 1330 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && |
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1265 | 1338 | ||
1266 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | 1339 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); |
1267 | /* The embedded headers contain source and dest in reverse order */ | 1340 | /* The embedded headers contain source and dest in reverse order */ |
1268 | cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); | 1341 | cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); |
1269 | if (!cp) { | 1342 | if (!cp) { |
1270 | /* The packet could also belong to a local client */ | 1343 | /* The packet could also belong to a local client */ |
1271 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | 1344 | cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); |
1272 | if (cp) { | 1345 | if (cp) { |
1273 | snet.ip = iph->saddr; | 1346 | snet.ip = iph->saddr; |
1274 | return handle_response_icmp(AF_INET, skb, &snet, | 1347 | return handle_response_icmp(AF_INET, skb, &snet, |
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1312 | static int | 1385 | static int |
1313 | ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | 1386 | ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) |
1314 | { | 1387 | { |
1388 | struct net *net = NULL; | ||
1315 | struct ipv6hdr *iph; | 1389 | struct ipv6hdr *iph; |
1316 | struct icmp6hdr _icmph, *ic; | 1390 | struct icmp6hdr _icmph, *ic; |
1317 | struct ipv6hdr _ciph, *cih; /* The ip header contained | 1391 | struct ipv6hdr _ciph, *cih; /* The ip header contained |
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1319 | struct ip_vs_iphdr ciph; | 1393 | struct ip_vs_iphdr ciph; |
1320 | struct ip_vs_conn *cp; | 1394 | struct ip_vs_conn *cp; |
1321 | struct ip_vs_protocol *pp; | 1395 | struct ip_vs_protocol *pp; |
1396 | struct ip_vs_proto_data *pd; | ||
1322 | unsigned int offset, verdict; | 1397 | unsigned int offset, verdict; |
1323 | union nf_inet_addr snet; | 1398 | union nf_inet_addr snet; |
1324 | struct rt6_info *rt; | 1399 | struct rt6_info *rt; |
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1361 | if (cih == NULL) | 1436 | if (cih == NULL) |
1362 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | 1437 | return NF_ACCEPT; /* The packet looks wrong, ignore */ |
1363 | 1438 | ||
1364 | pp = ip_vs_proto_get(cih->nexthdr); | 1439 | net = skb_net(skb); |
1365 | if (!pp) | 1440 | pd = ip_vs_proto_data_get(net, cih->nexthdr); |
1441 | if (!pd) | ||
1366 | return NF_ACCEPT; | 1442 | return NF_ACCEPT; |
1443 | pp = pd->pp; | ||
1367 | 1444 | ||
1368 | /* Is the embedded protocol header present? */ | 1445 | /* Is the embedded protocol header present? */ |
1369 | /* TODO: we don't support fragmentation at the moment anyways */ | 1446 | /* TODO: we don't support fragmentation at the moment anyways */ |
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1377 | 1454 | ||
1378 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); | 1455 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); |
1379 | /* The embedded headers contain source and dest in reverse order */ | 1456 | /* The embedded headers contain source and dest in reverse order */ |
1380 | cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); | 1457 | cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); |
1381 | if (!cp) { | 1458 | if (!cp) { |
1382 | /* The packet could also belong to a local client */ | 1459 | /* The packet could also belong to a local client */ |
1383 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | 1460 | cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); |
1384 | if (cp) { | 1461 | if (cp) { |
1385 | ipv6_addr_copy(&snet.in6, &iph->saddr); | 1462 | ipv6_addr_copy(&snet.in6, &iph->saddr); |
1386 | return handle_response_icmp(AF_INET6, skb, &snet, | 1463 | return handle_response_icmp(AF_INET6, skb, &snet, |
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1423 | static unsigned int | 1500 | static unsigned int |
1424 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | 1501 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) |
1425 | { | 1502 | { |
1503 | struct net *net; | ||
1426 | struct ip_vs_iphdr iph; | 1504 | struct ip_vs_iphdr iph; |
1427 | struct ip_vs_protocol *pp; | 1505 | struct ip_vs_protocol *pp; |
1506 | struct ip_vs_proto_data *pd; | ||
1428 | struct ip_vs_conn *cp; | 1507 | struct ip_vs_conn *cp; |
1429 | int ret, restart, pkts; | 1508 | int ret, restart, pkts; |
1509 | struct netns_ipvs *ipvs; | ||
1430 | 1510 | ||
1431 | /* Already marked as IPVS request or reply? */ | 1511 | /* Already marked as IPVS request or reply? */ |
1432 | if (skb->ipvs_property) | 1512 | if (skb->ipvs_property) |
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1480 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1560 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1481 | } | 1561 | } |
1482 | 1562 | ||
1563 | net = skb_net(skb); | ||
1483 | /* Protocol supported? */ | 1564 | /* Protocol supported? */ |
1484 | pp = ip_vs_proto_get(iph.protocol); | 1565 | pd = ip_vs_proto_data_get(net, iph.protocol); |
1485 | if (unlikely(!pp)) | 1566 | if (unlikely(!pd)) |
1486 | return NF_ACCEPT; | 1567 | return NF_ACCEPT; |
1487 | 1568 | pp = pd->pp; | |
1488 | /* | 1569 | /* |
1489 | * Check if the packet belongs to an existing connection entry | 1570 | * Check if the packet belongs to an existing connection entry |
1490 | */ | 1571 | */ |
1491 | cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); | 1572 | cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); |
1492 | 1573 | ||
1493 | if (unlikely(!cp)) { | 1574 | if (unlikely(!cp)) { |
1494 | int v; | 1575 | int v; |
1495 | 1576 | ||
1496 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | 1577 | if (!pp->conn_schedule(af, skb, pd, &v, &cp)) |
1497 | return v; | 1578 | return v; |
1498 | } | 1579 | } |
1499 | 1580 | ||
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1505 | } | 1586 | } |
1506 | 1587 | ||
1507 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); | 1588 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); |
1508 | 1589 | net = skb_net(skb); | |
1590 | ipvs = net_ipvs(net); | ||
1509 | /* Check the server status */ | 1591 | /* Check the server status */ |
1510 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 1592 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
1511 | /* the destination server is not available */ | 1593 | /* the destination server is not available */ |
1512 | 1594 | ||
1513 | if (sysctl_ip_vs_expire_nodest_conn) { | 1595 | if (ipvs->sysctl_expire_nodest_conn) { |
1514 | /* try to expire the connection immediately */ | 1596 | /* try to expire the connection immediately */ |
1515 | ip_vs_conn_expire_now(cp); | 1597 | ip_vs_conn_expire_now(cp); |
1516 | } | 1598 | } |
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1521 | } | 1603 | } |
1522 | 1604 | ||
1523 | ip_vs_in_stats(cp, skb); | 1605 | ip_vs_in_stats(cp, skb); |
1524 | restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); | 1606 | restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); |
1525 | if (cp->packet_xmit) | 1607 | if (cp->packet_xmit) |
1526 | ret = cp->packet_xmit(skb, cp, pp); | 1608 | ret = cp->packet_xmit(skb, cp, pp); |
1527 | /* do not touch skb anymore */ | 1609 | /* do not touch skb anymore */ |
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1535 | * | 1617 | * |
1536 | * Sync connection if it is about to close to | 1618 | * Sync connection if it is about to close to |
1537 | * encorage the standby servers to update the connections timeout | 1619 | * encorage the standby servers to update the connections timeout |
1620 | * | ||
1621 | * For ONE_PKT let ip_vs_sync_conn() do the filter work. | ||
1538 | */ | 1622 | */ |
1539 | pkts = atomic_add_return(1, &cp->in_pkts); | 1623 | |
1540 | if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && | 1624 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
1625 | pkts = ipvs->sysctl_sync_threshold[0]; | ||
1626 | else | ||
1627 | pkts = atomic_add_return(1, &cp->in_pkts); | ||
1628 | |||
1629 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && | ||
1541 | cp->protocol == IPPROTO_SCTP) { | 1630 | cp->protocol == IPPROTO_SCTP) { |
1542 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && | 1631 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && |
1543 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1632 | (pkts % ipvs->sysctl_sync_threshold[1] |
1544 | == sysctl_ip_vs_sync_threshold[0])) || | 1633 | == ipvs->sysctl_sync_threshold[0])) || |
1545 | (cp->old_state != cp->state && | 1634 | (cp->old_state != cp->state && |
1546 | ((cp->state == IP_VS_SCTP_S_CLOSED) || | 1635 | ((cp->state == IP_VS_SCTP_S_CLOSED) || |
1547 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || | 1636 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || |
1548 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { | 1637 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { |
1549 | ip_vs_sync_conn(cp); | 1638 | ip_vs_sync_conn(net, cp); |
1550 | goto out; | 1639 | goto out; |
1551 | } | 1640 | } |
1552 | } | 1641 | } |
1553 | 1642 | ||
1554 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ | 1643 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ |
1555 | else if (af == AF_INET && | 1644 | else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && |
1556 | (ip_vs_sync_state & IP_VS_STATE_MASTER) && | ||
1557 | (((cp->protocol != IPPROTO_TCP || | 1645 | (((cp->protocol != IPPROTO_TCP || |
1558 | cp->state == IP_VS_TCP_S_ESTABLISHED) && | 1646 | cp->state == IP_VS_TCP_S_ESTABLISHED) && |
1559 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1647 | (pkts % ipvs->sysctl_sync_threshold[1] |
1560 | == sysctl_ip_vs_sync_threshold[0])) || | 1648 | == ipvs->sysctl_sync_threshold[0])) || |
1561 | ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && | 1649 | ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && |
1562 | ((cp->state == IP_VS_TCP_S_FIN_WAIT) || | 1650 | ((cp->state == IP_VS_TCP_S_FIN_WAIT) || |
1563 | (cp->state == IP_VS_TCP_S_CLOSE) || | 1651 | (cp->state == IP_VS_TCP_S_CLOSE) || |
1564 | (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || | 1652 | (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || |
1565 | (cp->state == IP_VS_TCP_S_TIME_WAIT))))) | 1653 | (cp->state == IP_VS_TCP_S_TIME_WAIT))))) |
1566 | ip_vs_sync_conn(cp); | 1654 | ip_vs_sync_conn(net, cp); |
1567 | out: | 1655 | out: |
1568 | cp->old_state = cp->state; | 1656 | cp->old_state = cp->state; |
1569 | 1657 | ||
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1782 | }, | 1870 | }, |
1783 | #endif | 1871 | #endif |
1784 | }; | 1872 | }; |
1873 | /* | ||
1874 | * Initialize IP Virtual Server netns mem. | ||
1875 | */ | ||
1876 | static int __net_init __ip_vs_init(struct net *net) | ||
1877 | { | ||
1878 | struct netns_ipvs *ipvs; | ||
1879 | |||
1880 | ipvs = net_generic(net, ip_vs_net_id); | ||
1881 | if (ipvs == NULL) { | ||
1882 | pr_err("%s(): no memory.\n", __func__); | ||
1883 | return -ENOMEM; | ||
1884 | } | ||
1885 | ipvs->net = net; | ||
1886 | /* Counters used for creating unique names */ | ||
1887 | ipvs->gen = atomic_read(&ipvs_netns_cnt); | ||
1888 | atomic_inc(&ipvs_netns_cnt); | ||
1889 | net->ipvs = ipvs; | ||
1890 | printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", | ||
1891 | sizeof(struct netns_ipvs), ipvs->gen); | ||
1892 | return 0; | ||
1893 | } | ||
1894 | |||
1895 | static void __net_exit __ip_vs_cleanup(struct net *net) | ||
1896 | { | ||
1897 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1785 | 1898 | ||
1899 | IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); | ||
1900 | } | ||
1901 | |||
1902 | static struct pernet_operations ipvs_core_ops = { | ||
1903 | .init = __ip_vs_init, | ||
1904 | .exit = __ip_vs_cleanup, | ||
1905 | .id = &ip_vs_net_id, | ||
1906 | .size = sizeof(struct netns_ipvs), | ||
1907 | }; | ||
1786 | 1908 | ||
1787 | /* | 1909 | /* |
1788 | * Initialize IP Virtual Server | 1910 | * Initialize IP Virtual Server |
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void) | |||
1791 | { | 1913 | { |
1792 | int ret; | 1914 | int ret; |
1793 | 1915 | ||
1794 | ip_vs_estimator_init(); | 1916 | ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ |
1917 | if (ret < 0) | ||
1918 | return ret; | ||
1795 | 1919 | ||
1920 | ip_vs_estimator_init(); | ||
1796 | ret = ip_vs_control_init(); | 1921 | ret = ip_vs_control_init(); |
1797 | if (ret < 0) { | 1922 | if (ret < 0) { |
1798 | pr_err("can't setup control.\n"); | 1923 | pr_err("can't setup control.\n"); |
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void) | |||
1813 | goto cleanup_app; | 1938 | goto cleanup_app; |
1814 | } | 1939 | } |
1815 | 1940 | ||
1941 | ret = ip_vs_sync_init(); | ||
1942 | if (ret < 0) { | ||
1943 | pr_err("can't setup sync data.\n"); | ||
1944 | goto cleanup_conn; | ||
1945 | } | ||
1946 | |||
1816 | ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | 1947 | ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); |
1817 | if (ret < 0) { | 1948 | if (ret < 0) { |
1818 | pr_err("can't register hooks.\n"); | 1949 | pr_err("can't register hooks.\n"); |
1819 | goto cleanup_conn; | 1950 | goto cleanup_sync; |
1820 | } | 1951 | } |
1821 | 1952 | ||
1822 | pr_info("ipvs loaded.\n"); | 1953 | pr_info("ipvs loaded.\n"); |
1823 | return ret; | 1954 | return ret; |
1824 | 1955 | ||
1956 | cleanup_sync: | ||
1957 | ip_vs_sync_cleanup(); | ||
1825 | cleanup_conn: | 1958 | cleanup_conn: |
1826 | ip_vs_conn_cleanup(); | 1959 | ip_vs_conn_cleanup(); |
1827 | cleanup_app: | 1960 | cleanup_app: |
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void) | |||
1831 | ip_vs_control_cleanup(); | 1964 | ip_vs_control_cleanup(); |
1832 | cleanup_estimator: | 1965 | cleanup_estimator: |
1833 | ip_vs_estimator_cleanup(); | 1966 | ip_vs_estimator_cleanup(); |
1967 | unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ | ||
1834 | return ret; | 1968 | return ret; |
1835 | } | 1969 | } |
1836 | 1970 | ||
1837 | static void __exit ip_vs_cleanup(void) | 1971 | static void __exit ip_vs_cleanup(void) |
1838 | { | 1972 | { |
1839 | nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | 1973 | nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); |
1974 | ip_vs_sync_cleanup(); | ||
1840 | ip_vs_conn_cleanup(); | 1975 | ip_vs_conn_cleanup(); |
1841 | ip_vs_app_cleanup(); | 1976 | ip_vs_app_cleanup(); |
1842 | ip_vs_protocol_cleanup(); | 1977 | ip_vs_protocol_cleanup(); |
1843 | ip_vs_control_cleanup(); | 1978 | ip_vs_control_cleanup(); |
1844 | ip_vs_estimator_cleanup(); | 1979 | ip_vs_estimator_cleanup(); |
1980 | unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ | ||
1845 | pr_info("ipvs unloaded.\n"); | 1981 | pr_info("ipvs unloaded.\n"); |
1846 | } | 1982 | } |
1847 | 1983 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 22f7ad5101ab..09ca2ce2f2b7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/mutex.h> | 38 | #include <linux/mutex.h> |
39 | 39 | ||
40 | #include <net/net_namespace.h> | 40 | #include <net/net_namespace.h> |
41 | #include <linux/nsproxy.h> | ||
41 | #include <net/ip.h> | 42 | #include <net/ip.h> |
42 | #ifdef CONFIG_IP_VS_IPV6 | 43 | #ifdef CONFIG_IP_VS_IPV6 |
43 | #include <net/ipv6.h> | 44 | #include <net/ipv6.h> |
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); | |||
57 | /* lock for service table */ | 58 | /* lock for service table */ |
58 | static DEFINE_RWLOCK(__ip_vs_svc_lock); | 59 | static DEFINE_RWLOCK(__ip_vs_svc_lock); |
59 | 60 | ||
60 | /* lock for table with the real services */ | ||
61 | static DEFINE_RWLOCK(__ip_vs_rs_lock); | ||
62 | |||
63 | /* lock for state and timeout tables */ | ||
64 | static DEFINE_SPINLOCK(ip_vs_securetcp_lock); | ||
65 | |||
66 | /* lock for drop entry handling */ | ||
67 | static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); | ||
68 | |||
69 | /* lock for drop packet handling */ | ||
70 | static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); | ||
71 | |||
72 | /* 1/rate drop and drop-entry variables */ | ||
73 | int ip_vs_drop_rate = 0; | ||
74 | int ip_vs_drop_counter = 0; | ||
75 | static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); | ||
76 | |||
77 | /* number of virtual services */ | ||
78 | static int ip_vs_num_services = 0; | ||
79 | |||
80 | /* sysctl variables */ | 61 | /* sysctl variables */ |
81 | static int sysctl_ip_vs_drop_entry = 0; | ||
82 | static int sysctl_ip_vs_drop_packet = 0; | ||
83 | static int sysctl_ip_vs_secure_tcp = 0; | ||
84 | static int sysctl_ip_vs_amemthresh = 1024; | ||
85 | static int sysctl_ip_vs_am_droprate = 10; | ||
86 | int sysctl_ip_vs_cache_bypass = 0; | ||
87 | int sysctl_ip_vs_expire_nodest_conn = 0; | ||
88 | int sysctl_ip_vs_expire_quiescent_template = 0; | ||
89 | int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; | ||
90 | int sysctl_ip_vs_nat_icmp_send = 0; | ||
91 | #ifdef CONFIG_IP_VS_NFCT | ||
92 | int sysctl_ip_vs_conntrack; | ||
93 | #endif | ||
94 | int sysctl_ip_vs_snat_reroute = 1; | ||
95 | |||
96 | 62 | ||
97 | #ifdef CONFIG_IP_VS_DEBUG | 63 | #ifdef CONFIG_IP_VS_DEBUG |
98 | static int sysctl_ip_vs_debug_level = 0; | 64 | static int sysctl_ip_vs_debug_level = 0; |
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void) | |||
105 | 71 | ||
106 | #ifdef CONFIG_IP_VS_IPV6 | 72 | #ifdef CONFIG_IP_VS_IPV6 |
107 | /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ | 73 | /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ |
108 | static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | 74 | static int __ip_vs_addr_is_local_v6(struct net *net, |
75 | const struct in6_addr *addr) | ||
109 | { | 76 | { |
110 | struct rt6_info *rt; | 77 | struct rt6_info *rt; |
111 | struct flowi fl = { | 78 | struct flowi fl = { |
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | |||
114 | .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, | 81 | .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, |
115 | }; | 82 | }; |
116 | 83 | ||
117 | rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); | 84 | rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); |
118 | if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) | 85 | if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) |
119 | return 1; | 86 | return 1; |
120 | 87 | ||
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | |||
125 | * update_defense_level is called from keventd and from sysctl, | 92 | * update_defense_level is called from keventd and from sysctl, |
126 | * so it needs to protect itself from softirqs | 93 | * so it needs to protect itself from softirqs |
127 | */ | 94 | */ |
128 | static void update_defense_level(void) | 95 | static void update_defense_level(struct netns_ipvs *ipvs) |
129 | { | 96 | { |
130 | struct sysinfo i; | 97 | struct sysinfo i; |
131 | static int old_secure_tcp = 0; | 98 | static int old_secure_tcp = 0; |
@@ -141,73 +108,73 @@ static void update_defense_level(void) | |||
141 | /* si_swapinfo(&i); */ | 108 | /* si_swapinfo(&i); */ |
142 | /* availmem = availmem - (i.totalswap - i.freeswap); */ | 109 | /* availmem = availmem - (i.totalswap - i.freeswap); */ |
143 | 110 | ||
144 | nomem = (availmem < sysctl_ip_vs_amemthresh); | 111 | nomem = (availmem < ipvs->sysctl_amemthresh); |
145 | 112 | ||
146 | local_bh_disable(); | 113 | local_bh_disable(); |
147 | 114 | ||
148 | /* drop_entry */ | 115 | /* drop_entry */ |
149 | spin_lock(&__ip_vs_dropentry_lock); | 116 | spin_lock(&ipvs->dropentry_lock); |
150 | switch (sysctl_ip_vs_drop_entry) { | 117 | switch (ipvs->sysctl_drop_entry) { |
151 | case 0: | 118 | case 0: |
152 | atomic_set(&ip_vs_dropentry, 0); | 119 | atomic_set(&ipvs->dropentry, 0); |
153 | break; | 120 | break; |
154 | case 1: | 121 | case 1: |
155 | if (nomem) { | 122 | if (nomem) { |
156 | atomic_set(&ip_vs_dropentry, 1); | 123 | atomic_set(&ipvs->dropentry, 1); |
157 | sysctl_ip_vs_drop_entry = 2; | 124 | ipvs->sysctl_drop_entry = 2; |
158 | } else { | 125 | } else { |
159 | atomic_set(&ip_vs_dropentry, 0); | 126 | atomic_set(&ipvs->dropentry, 0); |
160 | } | 127 | } |
161 | break; | 128 | break; |
162 | case 2: | 129 | case 2: |
163 | if (nomem) { | 130 | if (nomem) { |
164 | atomic_set(&ip_vs_dropentry, 1); | 131 | atomic_set(&ipvs->dropentry, 1); |
165 | } else { | 132 | } else { |
166 | atomic_set(&ip_vs_dropentry, 0); | 133 | atomic_set(&ipvs->dropentry, 0); |
167 | sysctl_ip_vs_drop_entry = 1; | 134 | ipvs->sysctl_drop_entry = 1; |
168 | }; | 135 | }; |
169 | break; | 136 | break; |
170 | case 3: | 137 | case 3: |
171 | atomic_set(&ip_vs_dropentry, 1); | 138 | atomic_set(&ipvs->dropentry, 1); |
172 | break; | 139 | break; |
173 | } | 140 | } |
174 | spin_unlock(&__ip_vs_dropentry_lock); | 141 | spin_unlock(&ipvs->dropentry_lock); |
175 | 142 | ||
176 | /* drop_packet */ | 143 | /* drop_packet */ |
177 | spin_lock(&__ip_vs_droppacket_lock); | 144 | spin_lock(&ipvs->droppacket_lock); |
178 | switch (sysctl_ip_vs_drop_packet) { | 145 | switch (ipvs->sysctl_drop_packet) { |
179 | case 0: | 146 | case 0: |
180 | ip_vs_drop_rate = 0; | 147 | ipvs->drop_rate = 0; |
181 | break; | 148 | break; |
182 | case 1: | 149 | case 1: |
183 | if (nomem) { | 150 | if (nomem) { |
184 | ip_vs_drop_rate = ip_vs_drop_counter | 151 | ipvs->drop_rate = ipvs->drop_counter |
185 | = sysctl_ip_vs_amemthresh / | 152 | = ipvs->sysctl_amemthresh / |
186 | (sysctl_ip_vs_amemthresh-availmem); | 153 | (ipvs->sysctl_amemthresh-availmem); |
187 | sysctl_ip_vs_drop_packet = 2; | 154 | ipvs->sysctl_drop_packet = 2; |
188 | } else { | 155 | } else { |
189 | ip_vs_drop_rate = 0; | 156 | ipvs->drop_rate = 0; |
190 | } | 157 | } |
191 | break; | 158 | break; |
192 | case 2: | 159 | case 2: |
193 | if (nomem) { | 160 | if (nomem) { |
194 | ip_vs_drop_rate = ip_vs_drop_counter | 161 | ipvs->drop_rate = ipvs->drop_counter |
195 | = sysctl_ip_vs_amemthresh / | 162 | = ipvs->sysctl_amemthresh / |
196 | (sysctl_ip_vs_amemthresh-availmem); | 163 | (ipvs->sysctl_amemthresh-availmem); |
197 | } else { | 164 | } else { |
198 | ip_vs_drop_rate = 0; | 165 | ipvs->drop_rate = 0; |
199 | sysctl_ip_vs_drop_packet = 1; | 166 | ipvs->sysctl_drop_packet = 1; |
200 | } | 167 | } |
201 | break; | 168 | break; |
202 | case 3: | 169 | case 3: |
203 | ip_vs_drop_rate = sysctl_ip_vs_am_droprate; | 170 | ipvs->drop_rate = ipvs->sysctl_am_droprate; |
204 | break; | 171 | break; |
205 | } | 172 | } |
206 | spin_unlock(&__ip_vs_droppacket_lock); | 173 | spin_unlock(&ipvs->droppacket_lock); |
207 | 174 | ||
208 | /* secure_tcp */ | 175 | /* secure_tcp */ |
209 | spin_lock(&ip_vs_securetcp_lock); | 176 | spin_lock(&ipvs->securetcp_lock); |
210 | switch (sysctl_ip_vs_secure_tcp) { | 177 | switch (ipvs->sysctl_secure_tcp) { |
211 | case 0: | 178 | case 0: |
212 | if (old_secure_tcp >= 2) | 179 | if (old_secure_tcp >= 2) |
213 | to_change = 0; | 180 | to_change = 0; |
@@ -216,7 +183,7 @@ static void update_defense_level(void) | |||
216 | if (nomem) { | 183 | if (nomem) { |
217 | if (old_secure_tcp < 2) | 184 | if (old_secure_tcp < 2) |
218 | to_change = 1; | 185 | to_change = 1; |
219 | sysctl_ip_vs_secure_tcp = 2; | 186 | ipvs->sysctl_secure_tcp = 2; |
220 | } else { | 187 | } else { |
221 | if (old_secure_tcp >= 2) | 188 | if (old_secure_tcp >= 2) |
222 | to_change = 0; | 189 | to_change = 0; |
@@ -229,7 +196,7 @@ static void update_defense_level(void) | |||
229 | } else { | 196 | } else { |
230 | if (old_secure_tcp >= 2) | 197 | if (old_secure_tcp >= 2) |
231 | to_change = 0; | 198 | to_change = 0; |
232 | sysctl_ip_vs_secure_tcp = 1; | 199 | ipvs->sysctl_secure_tcp = 1; |
233 | } | 200 | } |
234 | break; | 201 | break; |
235 | case 3: | 202 | case 3: |
@@ -237,10 +204,11 @@ static void update_defense_level(void) | |||
237 | to_change = 1; | 204 | to_change = 1; |
238 | break; | 205 | break; |
239 | } | 206 | } |
240 | old_secure_tcp = sysctl_ip_vs_secure_tcp; | 207 | old_secure_tcp = ipvs->sysctl_secure_tcp; |
241 | if (to_change >= 0) | 208 | if (to_change >= 0) |
242 | ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); | 209 | ip_vs_protocol_timeout_change(ipvs, |
243 | spin_unlock(&ip_vs_securetcp_lock); | 210 | ipvs->sysctl_secure_tcp > 1); |
211 | spin_unlock(&ipvs->securetcp_lock); | ||
244 | 212 | ||
245 | local_bh_enable(); | 213 | local_bh_enable(); |
246 | } | 214 | } |
@@ -250,16 +218,16 @@ static void update_defense_level(void) | |||
250 | * Timer for checking the defense | 218 | * Timer for checking the defense |
251 | */ | 219 | */ |
252 | #define DEFENSE_TIMER_PERIOD 1*HZ | 220 | #define DEFENSE_TIMER_PERIOD 1*HZ |
253 | static void defense_work_handler(struct work_struct *work); | ||
254 | static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); | ||
255 | 221 | ||
256 | static void defense_work_handler(struct work_struct *work) | 222 | static void defense_work_handler(struct work_struct *work) |
257 | { | 223 | { |
258 | update_defense_level(); | 224 | struct netns_ipvs *ipvs = |
259 | if (atomic_read(&ip_vs_dropentry)) | 225 | container_of(work, struct netns_ipvs, defense_work.work); |
260 | ip_vs_random_dropentry(); | ||
261 | 226 | ||
262 | schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); | 227 | update_defense_level(ipvs); |
228 | if (atomic_read(&ipvs->dropentry)) | ||
229 | ip_vs_random_dropentry(ipvs->net); | ||
230 | schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); | ||
263 | } | 231 | } |
264 | 232 | ||
265 | int | 233 | int |
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; | |||
287 | /* the service table hashed by fwmark */ | 255 | /* the service table hashed by fwmark */ |
288 | static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; | 256 | static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; |
289 | 257 | ||
290 | /* | ||
291 | * Hash table: for real service lookups | ||
292 | */ | ||
293 | #define IP_VS_RTAB_BITS 4 | ||
294 | #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) | ||
295 | #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) | ||
296 | |||
297 | static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; | ||
298 | |||
299 | /* | ||
300 | * Trash for destinations | ||
301 | */ | ||
302 | static LIST_HEAD(ip_vs_dest_trash); | ||
303 | |||
304 | /* | ||
305 | * FTP & NULL virtual service counters | ||
306 | */ | ||
307 | static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); | ||
308 | static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); | ||
309 | |||
310 | 258 | ||
311 | /* | 259 | /* |
312 | * Returns hash value for virtual service | 260 | * Returns hash value for virtual service |
313 | */ | 261 | */ |
314 | static __inline__ unsigned | 262 | static inline unsigned |
315 | ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | 263 | ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, |
316 | __be16 port) | 264 | const union nf_inet_addr *addr, __be16 port) |
317 | { | 265 | { |
318 | register unsigned porth = ntohs(port); | 266 | register unsigned porth = ntohs(port); |
319 | __be32 addr_fold = addr->ip; | 267 | __be32 addr_fold = addr->ip; |
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | |||
323 | addr_fold = addr->ip6[0]^addr->ip6[1]^ | 271 | addr_fold = addr->ip6[0]^addr->ip6[1]^ |
324 | addr->ip6[2]^addr->ip6[3]; | 272 | addr->ip6[2]^addr->ip6[3]; |
325 | #endif | 273 | #endif |
274 | addr_fold ^= ((size_t)net>>8); | ||
326 | 275 | ||
327 | return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) | 276 | return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) |
328 | & IP_VS_SVC_TAB_MASK; | 277 | & IP_VS_SVC_TAB_MASK; |
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | |||
331 | /* | 280 | /* |
332 | * Returns hash value of fwmark for virtual service lookup | 281 | * Returns hash value of fwmark for virtual service lookup |
333 | */ | 282 | */ |
334 | static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) | 283 | static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) |
335 | { | 284 | { |
336 | return fwmark & IP_VS_SVC_TAB_MASK; | 285 | return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; |
337 | } | 286 | } |
338 | 287 | ||
339 | /* | 288 | /* |
340 | * Hashes a service in the ip_vs_svc_table by <proto,addr,port> | 289 | * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> |
341 | * or in the ip_vs_svc_fwm_table by fwmark. | 290 | * or in the ip_vs_svc_fwm_table by fwmark. |
342 | * Should be called with locked tables. | 291 | * Should be called with locked tables. |
343 | */ | 292 | */ |
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) | |||
353 | 302 | ||
354 | if (svc->fwmark == 0) { | 303 | if (svc->fwmark == 0) { |
355 | /* | 304 | /* |
356 | * Hash it by <protocol,addr,port> in ip_vs_svc_table | 305 | * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table |
357 | */ | 306 | */ |
358 | hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, | 307 | hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, |
359 | svc->port); | 308 | &svc->addr, svc->port); |
360 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); | 309 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); |
361 | } else { | 310 | } else { |
362 | /* | 311 | /* |
363 | * Hash it by fwmark in ip_vs_svc_fwm_table | 312 | * Hash it by fwmark in svc_fwm_table |
364 | */ | 313 | */ |
365 | hash = ip_vs_svc_fwm_hashkey(svc->fwmark); | 314 | hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); |
366 | list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); | 315 | list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); |
367 | } | 316 | } |
368 | 317 | ||
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) | |||
374 | 323 | ||
375 | 324 | ||
376 | /* | 325 | /* |
377 | * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. | 326 | * Unhashes a service from svc_table / svc_fwm_table. |
378 | * Should be called with locked tables. | 327 | * Should be called with locked tables. |
379 | */ | 328 | */ |
380 | static int ip_vs_svc_unhash(struct ip_vs_service *svc) | 329 | static int ip_vs_svc_unhash(struct ip_vs_service *svc) |
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) | |||
386 | } | 335 | } |
387 | 336 | ||
388 | if (svc->fwmark == 0) { | 337 | if (svc->fwmark == 0) { |
389 | /* Remove it from the ip_vs_svc_table table */ | 338 | /* Remove it from the svc_table table */ |
390 | list_del(&svc->s_list); | 339 | list_del(&svc->s_list); |
391 | } else { | 340 | } else { |
392 | /* Remove it from the ip_vs_svc_fwm_table table */ | 341 | /* Remove it from the svc_fwm_table table */ |
393 | list_del(&svc->f_list); | 342 | list_del(&svc->f_list); |
394 | } | 343 | } |
395 | 344 | ||
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) | |||
400 | 349 | ||
401 | 350 | ||
402 | /* | 351 | /* |
403 | * Get service by {proto,addr,port} in the service table. | 352 | * Get service by {netns, proto,addr,port} in the service table. |
404 | */ | 353 | */ |
405 | static inline struct ip_vs_service * | 354 | static inline struct ip_vs_service * |
406 | __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, | 355 | __ip_vs_service_find(struct net *net, int af, __u16 protocol, |
407 | __be16 vport) | 356 | const union nf_inet_addr *vaddr, __be16 vport) |
408 | { | 357 | { |
409 | unsigned hash; | 358 | unsigned hash; |
410 | struct ip_vs_service *svc; | 359 | struct ip_vs_service *svc; |
411 | 360 | ||
412 | /* Check for "full" addressed entries */ | 361 | /* Check for "full" addressed entries */ |
413 | hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); | 362 | hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); |
414 | 363 | ||
415 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ | 364 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ |
416 | if ((svc->af == af) | 365 | if ((svc->af == af) |
417 | && ip_vs_addr_equal(af, &svc->addr, vaddr) | 366 | && ip_vs_addr_equal(af, &svc->addr, vaddr) |
418 | && (svc->port == vport) | 367 | && (svc->port == vport) |
419 | && (svc->protocol == protocol)) { | 368 | && (svc->protocol == protocol) |
369 | && net_eq(svc->net, net)) { | ||
420 | /* HIT */ | 370 | /* HIT */ |
421 | return svc; | 371 | return svc; |
422 | } | 372 | } |
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, | |||
430 | * Get service by {fwmark} in the service table. | 380 | * Get service by {fwmark} in the service table. |
431 | */ | 381 | */ |
432 | static inline struct ip_vs_service * | 382 | static inline struct ip_vs_service * |
433 | __ip_vs_svc_fwm_find(int af, __u32 fwmark) | 383 | __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) |
434 | { | 384 | { |
435 | unsigned hash; | 385 | unsigned hash; |
436 | struct ip_vs_service *svc; | 386 | struct ip_vs_service *svc; |
437 | 387 | ||
438 | /* Check for fwmark addressed entries */ | 388 | /* Check for fwmark addressed entries */ |
439 | hash = ip_vs_svc_fwm_hashkey(fwmark); | 389 | hash = ip_vs_svc_fwm_hashkey(net, fwmark); |
440 | 390 | ||
441 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { | 391 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { |
442 | if (svc->fwmark == fwmark && svc->af == af) { | 392 | if (svc->fwmark == fwmark && svc->af == af |
393 | && net_eq(svc->net, net)) { | ||
443 | /* HIT */ | 394 | /* HIT */ |
444 | return svc; | 395 | return svc; |
445 | } | 396 | } |
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) | |||
449 | } | 400 | } |
450 | 401 | ||
451 | struct ip_vs_service * | 402 | struct ip_vs_service * |
452 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | 403 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, |
453 | const union nf_inet_addr *vaddr, __be16 vport) | 404 | const union nf_inet_addr *vaddr, __be16 vport) |
454 | { | 405 | { |
455 | struct ip_vs_service *svc; | 406 | struct ip_vs_service *svc; |
407 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
456 | 408 | ||
457 | read_lock(&__ip_vs_svc_lock); | 409 | read_lock(&__ip_vs_svc_lock); |
458 | 410 | ||
459 | /* | 411 | /* |
460 | * Check the table hashed by fwmark first | 412 | * Check the table hashed by fwmark first |
461 | */ | 413 | */ |
462 | if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) | 414 | svc = __ip_vs_svc_fwm_find(net, af, fwmark); |
415 | if (fwmark && svc) | ||
463 | goto out; | 416 | goto out; |
464 | 417 | ||
465 | /* | 418 | /* |
466 | * Check the table hashed by <protocol,addr,port> | 419 | * Check the table hashed by <protocol,addr,port> |
467 | * for "full" addressed entries | 420 | * for "full" addressed entries |
468 | */ | 421 | */ |
469 | svc = __ip_vs_service_find(af, protocol, vaddr, vport); | 422 | svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); |
470 | 423 | ||
471 | if (svc == NULL | 424 | if (svc == NULL |
472 | && protocol == IPPROTO_TCP | 425 | && protocol == IPPROTO_TCP |
473 | && atomic_read(&ip_vs_ftpsvc_counter) | 426 | && atomic_read(&ipvs->ftpsvc_counter) |
474 | && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { | 427 | && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { |
475 | /* | 428 | /* |
476 | * Check if ftp service entry exists, the packet | 429 | * Check if ftp service entry exists, the packet |
477 | * might belong to FTP data connections. | 430 | * might belong to FTP data connections. |
478 | */ | 431 | */ |
479 | svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); | 432 | svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); |
480 | } | 433 | } |
481 | 434 | ||
482 | if (svc == NULL | 435 | if (svc == NULL |
483 | && atomic_read(&ip_vs_nullsvc_counter)) { | 436 | && atomic_read(&ipvs->nullsvc_counter)) { |
484 | /* | 437 | /* |
485 | * Check if the catch-all port (port zero) exists | 438 | * Check if the catch-all port (port zero) exists |
486 | */ | 439 | */ |
487 | svc = __ip_vs_service_find(af, protocol, vaddr, 0); | 440 | svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); |
488 | } | 441 | } |
489 | 442 | ||
490 | out: | 443 | out: |
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) | |||
519 | svc->fwmark, | 472 | svc->fwmark, |
520 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 473 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
521 | ntohs(svc->port), atomic_read(&svc->usecnt)); | 474 | ntohs(svc->port), atomic_read(&svc->usecnt)); |
475 | free_percpu(svc->stats.cpustats); | ||
522 | kfree(svc); | 476 | kfree(svc); |
523 | } | 477 | } |
524 | } | 478 | } |
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, | |||
545 | } | 499 | } |
546 | 500 | ||
547 | /* | 501 | /* |
548 | * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. | 502 | * Hashes ip_vs_dest in rs_table by <proto,addr,port>. |
549 | * should be called with locked tables. | 503 | * should be called with locked tables. |
550 | */ | 504 | */ |
551 | static int ip_vs_rs_hash(struct ip_vs_dest *dest) | 505 | static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) |
552 | { | 506 | { |
553 | unsigned hash; | 507 | unsigned hash; |
554 | 508 | ||
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) | |||
562 | */ | 516 | */ |
563 | hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); | 517 | hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); |
564 | 518 | ||
565 | list_add(&dest->d_list, &ip_vs_rtable[hash]); | 519 | list_add(&dest->d_list, &ipvs->rs_table[hash]); |
566 | 520 | ||
567 | return 1; | 521 | return 1; |
568 | } | 522 | } |
569 | 523 | ||
570 | /* | 524 | /* |
571 | * UNhashes ip_vs_dest from ip_vs_rtable. | 525 | * UNhashes ip_vs_dest from rs_table. |
572 | * should be called with locked tables. | 526 | * should be called with locked tables. |
573 | */ | 527 | */ |
574 | static int ip_vs_rs_unhash(struct ip_vs_dest *dest) | 528 | static int ip_vs_rs_unhash(struct ip_vs_dest *dest) |
575 | { | 529 | { |
576 | /* | 530 | /* |
577 | * Remove it from the ip_vs_rtable table. | 531 | * Remove it from the rs_table table. |
578 | */ | 532 | */ |
579 | if (!list_empty(&dest->d_list)) { | 533 | if (!list_empty(&dest->d_list)) { |
580 | list_del(&dest->d_list); | 534 | list_del(&dest->d_list); |
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) | |||
588 | * Lookup real service by <proto,addr,port> in the real service table. | 542 | * Lookup real service by <proto,addr,port> in the real service table. |
589 | */ | 543 | */ |
590 | struct ip_vs_dest * | 544 | struct ip_vs_dest * |
591 | ip_vs_lookup_real_service(int af, __u16 protocol, | 545 | ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, |
592 | const union nf_inet_addr *daddr, | 546 | const union nf_inet_addr *daddr, |
593 | __be16 dport) | 547 | __be16 dport) |
594 | { | 548 | { |
549 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
595 | unsigned hash; | 550 | unsigned hash; |
596 | struct ip_vs_dest *dest; | 551 | struct ip_vs_dest *dest; |
597 | 552 | ||
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol, | |||
601 | */ | 556 | */ |
602 | hash = ip_vs_rs_hashkey(af, daddr, dport); | 557 | hash = ip_vs_rs_hashkey(af, daddr, dport); |
603 | 558 | ||
604 | read_lock(&__ip_vs_rs_lock); | 559 | read_lock(&ipvs->rs_lock); |
605 | list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { | 560 | list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { |
606 | if ((dest->af == af) | 561 | if ((dest->af == af) |
607 | && ip_vs_addr_equal(af, &dest->addr, daddr) | 562 | && ip_vs_addr_equal(af, &dest->addr, daddr) |
608 | && (dest->port == dport) | 563 | && (dest->port == dport) |
609 | && ((dest->protocol == protocol) || | 564 | && ((dest->protocol == protocol) || |
610 | dest->vfwmark)) { | 565 | dest->vfwmark)) { |
611 | /* HIT */ | 566 | /* HIT */ |
612 | read_unlock(&__ip_vs_rs_lock); | 567 | read_unlock(&ipvs->rs_lock); |
613 | return dest; | 568 | return dest; |
614 | } | 569 | } |
615 | } | 570 | } |
616 | read_unlock(&__ip_vs_rs_lock); | 571 | read_unlock(&ipvs->rs_lock); |
617 | 572 | ||
618 | return NULL; | 573 | return NULL; |
619 | } | 574 | } |
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
652 | * ip_vs_lookup_real_service() looked promissing, but | 607 | * ip_vs_lookup_real_service() looked promissing, but |
653 | * seems not working as expected. | 608 | * seems not working as expected. |
654 | */ | 609 | */ |
655 | struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, | 610 | struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, |
611 | const union nf_inet_addr *daddr, | ||
656 | __be16 dport, | 612 | __be16 dport, |
657 | const union nf_inet_addr *vaddr, | 613 | const union nf_inet_addr *vaddr, |
658 | __be16 vport, __u16 protocol) | 614 | __be16 vport, __u16 protocol, __u32 fwmark) |
659 | { | 615 | { |
660 | struct ip_vs_dest *dest; | 616 | struct ip_vs_dest *dest; |
661 | struct ip_vs_service *svc; | 617 | struct ip_vs_service *svc; |
662 | 618 | ||
663 | svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); | 619 | svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); |
664 | if (!svc) | 620 | if (!svc) |
665 | return NULL; | 621 | return NULL; |
666 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 622 | dest = ip_vs_lookup_dest(svc, daddr, dport); |
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
685 | __be16 dport) | 641 | __be16 dport) |
686 | { | 642 | { |
687 | struct ip_vs_dest *dest, *nxt; | 643 | struct ip_vs_dest *dest, *nxt; |
644 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
688 | 645 | ||
689 | /* | 646 | /* |
690 | * Find the destination in trash | 647 | * Find the destination in trash |
691 | */ | 648 | */ |
692 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 649 | list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { |
693 | IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " | 650 | IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " |
694 | "dest->refcnt=%d\n", | 651 | "dest->refcnt=%d\n", |
695 | dest->vfwmark, | 652 | dest->vfwmark, |
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
720 | list_del(&dest->n_list); | 677 | list_del(&dest->n_list); |
721 | ip_vs_dst_reset(dest); | 678 | ip_vs_dst_reset(dest); |
722 | __ip_vs_unbind_svc(dest); | 679 | __ip_vs_unbind_svc(dest); |
680 | free_percpu(dest->stats.cpustats); | ||
723 | kfree(dest); | 681 | kfree(dest); |
724 | } | 682 | } |
725 | } | 683 | } |
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
737 | * are expired, and the refcnt of each destination in the trash must | 695 | * are expired, and the refcnt of each destination in the trash must |
738 | * be 1, so we simply release them here. | 696 | * be 1, so we simply release them here. |
739 | */ | 697 | */ |
740 | static void ip_vs_trash_cleanup(void) | 698 | static void ip_vs_trash_cleanup(struct net *net) |
741 | { | 699 | { |
742 | struct ip_vs_dest *dest, *nxt; | 700 | struct ip_vs_dest *dest, *nxt; |
701 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
743 | 702 | ||
744 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 703 | list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { |
745 | list_del(&dest->n_list); | 704 | list_del(&dest->n_list); |
746 | ip_vs_dst_reset(dest); | 705 | ip_vs_dst_reset(dest); |
747 | __ip_vs_unbind_svc(dest); | 706 | __ip_vs_unbind_svc(dest); |
707 | free_percpu(dest->stats.cpustats); | ||
748 | kfree(dest); | 708 | kfree(dest); |
749 | } | 709 | } |
750 | } | 710 | } |
@@ -768,6 +728,7 @@ static void | |||
768 | __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | 728 | __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, |
769 | struct ip_vs_dest_user_kern *udest, int add) | 729 | struct ip_vs_dest_user_kern *udest, int add) |
770 | { | 730 | { |
731 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
771 | int conn_flags; | 732 | int conn_flags; |
772 | 733 | ||
773 | /* set the weight and the flags */ | 734 | /* set the weight and the flags */ |
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
780 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; | 741 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; |
781 | } else { | 742 | } else { |
782 | /* | 743 | /* |
783 | * Put the real service in ip_vs_rtable if not present. | 744 | * Put the real service in rs_table if not present. |
784 | * For now only for NAT! | 745 | * For now only for NAT! |
785 | */ | 746 | */ |
786 | write_lock_bh(&__ip_vs_rs_lock); | 747 | write_lock_bh(&ipvs->rs_lock); |
787 | ip_vs_rs_hash(dest); | 748 | ip_vs_rs_hash(ipvs, dest); |
788 | write_unlock_bh(&__ip_vs_rs_lock); | 749 | write_unlock_bh(&ipvs->rs_lock); |
789 | } | 750 | } |
790 | atomic_set(&dest->conn_flags, conn_flags); | 751 | atomic_set(&dest->conn_flags, conn_flags); |
791 | 752 | ||
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
813 | spin_unlock(&dest->dst_lock); | 774 | spin_unlock(&dest->dst_lock); |
814 | 775 | ||
815 | if (add) | 776 | if (add) |
816 | ip_vs_new_estimator(&dest->stats); | 777 | ip_vs_new_estimator(svc->net, &dest->stats); |
817 | 778 | ||
818 | write_lock_bh(&__ip_vs_svc_lock); | 779 | write_lock_bh(&__ip_vs_svc_lock); |
819 | 780 | ||
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
850 | atype = ipv6_addr_type(&udest->addr.in6); | 811 | atype = ipv6_addr_type(&udest->addr.in6); |
851 | if ((!(atype & IPV6_ADDR_UNICAST) || | 812 | if ((!(atype & IPV6_ADDR_UNICAST) || |
852 | atype & IPV6_ADDR_LINKLOCAL) && | 813 | atype & IPV6_ADDR_LINKLOCAL) && |
853 | !__ip_vs_addr_is_local_v6(&udest->addr.in6)) | 814 | !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) |
854 | return -EINVAL; | 815 | return -EINVAL; |
855 | } else | 816 | } else |
856 | #endif | 817 | #endif |
857 | { | 818 | { |
858 | atype = inet_addr_type(&init_net, udest->addr.ip); | 819 | atype = inet_addr_type(svc->net, udest->addr.ip); |
859 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) | 820 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) |
860 | return -EINVAL; | 821 | return -EINVAL; |
861 | } | 822 | } |
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
865 | pr_err("%s(): no memory.\n", __func__); | 826 | pr_err("%s(): no memory.\n", __func__); |
866 | return -ENOMEM; | 827 | return -ENOMEM; |
867 | } | 828 | } |
829 | dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
830 | if (!dest->stats.cpustats) { | ||
831 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
832 | goto err_alloc; | ||
833 | } | ||
868 | 834 | ||
869 | dest->af = svc->af; | 835 | dest->af = svc->af; |
870 | dest->protocol = svc->protocol; | 836 | dest->protocol = svc->protocol; |
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
888 | 854 | ||
889 | LeaveFunction(2); | 855 | LeaveFunction(2); |
890 | return 0; | 856 | return 0; |
857 | |||
858 | err_alloc: | ||
859 | kfree(dest); | ||
860 | return -ENOMEM; | ||
891 | } | 861 | } |
892 | 862 | ||
893 | 863 | ||
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1006 | /* | 976 | /* |
1007 | * Delete a destination (must be already unlinked from the service) | 977 | * Delete a destination (must be already unlinked from the service) |
1008 | */ | 978 | */ |
1009 | static void __ip_vs_del_dest(struct ip_vs_dest *dest) | 979 | static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) |
1010 | { | 980 | { |
1011 | ip_vs_kill_estimator(&dest->stats); | 981 | struct netns_ipvs *ipvs = net_ipvs(net); |
982 | |||
983 | ip_vs_kill_estimator(net, &dest->stats); | ||
1012 | 984 | ||
1013 | /* | 985 | /* |
1014 | * Remove it from the d-linked list with the real services. | 986 | * Remove it from the d-linked list with the real services. |
1015 | */ | 987 | */ |
1016 | write_lock_bh(&__ip_vs_rs_lock); | 988 | write_lock_bh(&ipvs->rs_lock); |
1017 | ip_vs_rs_unhash(dest); | 989 | ip_vs_rs_unhash(dest); |
1018 | write_unlock_bh(&__ip_vs_rs_lock); | 990 | write_unlock_bh(&ipvs->rs_lock); |
1019 | 991 | ||
1020 | /* | 992 | /* |
1021 | * Decrease the refcnt of the dest, and free the dest | 993 | * Decrease the refcnt of the dest, and free the dest |
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
1034 | and only one user context can update virtual service at a | 1006 | and only one user context can update virtual service at a |
1035 | time, so the operation here is OK */ | 1007 | time, so the operation here is OK */ |
1036 | atomic_dec(&dest->svc->refcnt); | 1008 | atomic_dec(&dest->svc->refcnt); |
1009 | free_percpu(dest->stats.cpustats); | ||
1037 | kfree(dest); | 1010 | kfree(dest); |
1038 | } else { | 1011 | } else { |
1039 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " | 1012 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " |
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
1041 | IP_VS_DBG_ADDR(dest->af, &dest->addr), | 1014 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
1042 | ntohs(dest->port), | 1015 | ntohs(dest->port), |
1043 | atomic_read(&dest->refcnt)); | 1016 | atomic_read(&dest->refcnt)); |
1044 | list_add(&dest->n_list, &ip_vs_dest_trash); | 1017 | list_add(&dest->n_list, &ipvs->dest_trash); |
1045 | atomic_inc(&dest->refcnt); | 1018 | atomic_inc(&dest->refcnt); |
1046 | } | 1019 | } |
1047 | } | 1020 | } |
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1105 | /* | 1078 | /* |
1106 | * Delete the destination | 1079 | * Delete the destination |
1107 | */ | 1080 | */ |
1108 | __ip_vs_del_dest(dest); | 1081 | __ip_vs_del_dest(svc->net, dest); |
1109 | 1082 | ||
1110 | LeaveFunction(2); | 1083 | LeaveFunction(2); |
1111 | 1084 | ||
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1117 | * Add a service into the service hash table | 1090 | * Add a service into the service hash table |
1118 | */ | 1091 | */ |
1119 | static int | 1092 | static int |
1120 | ip_vs_add_service(struct ip_vs_service_user_kern *u, | 1093 | ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, |
1121 | struct ip_vs_service **svc_p) | 1094 | struct ip_vs_service **svc_p) |
1122 | { | 1095 | { |
1123 | int ret = 0; | 1096 | int ret = 0; |
1124 | struct ip_vs_scheduler *sched = NULL; | 1097 | struct ip_vs_scheduler *sched = NULL; |
1125 | struct ip_vs_pe *pe = NULL; | 1098 | struct ip_vs_pe *pe = NULL; |
1126 | struct ip_vs_service *svc = NULL; | 1099 | struct ip_vs_service *svc = NULL; |
1100 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1127 | 1101 | ||
1128 | /* increase the module use count */ | 1102 | /* increase the module use count */ |
1129 | ip_vs_use_count_inc(); | 1103 | ip_vs_use_count_inc(); |
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1137 | } | 1111 | } |
1138 | 1112 | ||
1139 | if (u->pe_name && *u->pe_name) { | 1113 | if (u->pe_name && *u->pe_name) { |
1140 | pe = ip_vs_pe_get(u->pe_name); | 1114 | pe = ip_vs_pe_getbyname(u->pe_name); |
1141 | if (pe == NULL) { | 1115 | if (pe == NULL) { |
1142 | pr_info("persistence engine module ip_vs_pe_%s " | 1116 | pr_info("persistence engine module ip_vs_pe_%s " |
1143 | "not found\n", u->pe_name); | 1117 | "not found\n", u->pe_name); |
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1159 | ret = -ENOMEM; | 1133 | ret = -ENOMEM; |
1160 | goto out_err; | 1134 | goto out_err; |
1161 | } | 1135 | } |
1136 | svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
1137 | if (!svc->stats.cpustats) { | ||
1138 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
1139 | goto out_err; | ||
1140 | } | ||
1162 | 1141 | ||
1163 | /* I'm the first user of the service */ | 1142 | /* I'm the first user of the service */ |
1164 | atomic_set(&svc->usecnt, 0); | 1143 | atomic_set(&svc->usecnt, 0); |
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1172 | svc->flags = u->flags; | 1151 | svc->flags = u->flags; |
1173 | svc->timeout = u->timeout * HZ; | 1152 | svc->timeout = u->timeout * HZ; |
1174 | svc->netmask = u->netmask; | 1153 | svc->netmask = u->netmask; |
1154 | svc->net = net; | ||
1175 | 1155 | ||
1176 | INIT_LIST_HEAD(&svc->destinations); | 1156 | INIT_LIST_HEAD(&svc->destinations); |
1177 | rwlock_init(&svc->sched_lock); | 1157 | rwlock_init(&svc->sched_lock); |
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1189 | 1169 | ||
1190 | /* Update the virtual service counters */ | 1170 | /* Update the virtual service counters */ |
1191 | if (svc->port == FTPPORT) | 1171 | if (svc->port == FTPPORT) |
1192 | atomic_inc(&ip_vs_ftpsvc_counter); | 1172 | atomic_inc(&ipvs->ftpsvc_counter); |
1193 | else if (svc->port == 0) | 1173 | else if (svc->port == 0) |
1194 | atomic_inc(&ip_vs_nullsvc_counter); | 1174 | atomic_inc(&ipvs->nullsvc_counter); |
1195 | 1175 | ||
1196 | ip_vs_new_estimator(&svc->stats); | 1176 | ip_vs_new_estimator(net, &svc->stats); |
1197 | 1177 | ||
1198 | /* Count only IPv4 services for old get/setsockopt interface */ | 1178 | /* Count only IPv4 services for old get/setsockopt interface */ |
1199 | if (svc->af == AF_INET) | 1179 | if (svc->af == AF_INET) |
1200 | ip_vs_num_services++; | 1180 | ipvs->num_services++; |
1201 | 1181 | ||
1202 | /* Hash the service into the service table */ | 1182 | /* Hash the service into the service table */ |
1203 | write_lock_bh(&__ip_vs_svc_lock); | 1183 | write_lock_bh(&__ip_vs_svc_lock); |
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1207 | *svc_p = svc; | 1187 | *svc_p = svc; |
1208 | return 0; | 1188 | return 0; |
1209 | 1189 | ||
1190 | |||
1210 | out_err: | 1191 | out_err: |
1211 | if (svc != NULL) { | 1192 | if (svc != NULL) { |
1212 | ip_vs_unbind_scheduler(svc); | 1193 | ip_vs_unbind_scheduler(svc); |
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1215 | ip_vs_app_inc_put(svc->inc); | 1196 | ip_vs_app_inc_put(svc->inc); |
1216 | local_bh_enable(); | 1197 | local_bh_enable(); |
1217 | } | 1198 | } |
1199 | if (svc->stats.cpustats) | ||
1200 | free_percpu(svc->stats.cpustats); | ||
1218 | kfree(svc); | 1201 | kfree(svc); |
1219 | } | 1202 | } |
1220 | ip_vs_scheduler_put(sched); | 1203 | ip_vs_scheduler_put(sched); |
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) | |||
1248 | old_sched = sched; | 1231 | old_sched = sched; |
1249 | 1232 | ||
1250 | if (u->pe_name && *u->pe_name) { | 1233 | if (u->pe_name && *u->pe_name) { |
1251 | pe = ip_vs_pe_get(u->pe_name); | 1234 | pe = ip_vs_pe_getbyname(u->pe_name); |
1252 | if (pe == NULL) { | 1235 | if (pe == NULL) { |
1253 | pr_info("persistence engine module ip_vs_pe_%s " | 1236 | pr_info("persistence engine module ip_vs_pe_%s " |
1254 | "not found\n", u->pe_name); | 1237 | "not found\n", u->pe_name); |
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1334 | struct ip_vs_dest *dest, *nxt; | 1317 | struct ip_vs_dest *dest, *nxt; |
1335 | struct ip_vs_scheduler *old_sched; | 1318 | struct ip_vs_scheduler *old_sched; |
1336 | struct ip_vs_pe *old_pe; | 1319 | struct ip_vs_pe *old_pe; |
1320 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
1337 | 1321 | ||
1338 | pr_info("%s: enter\n", __func__); | 1322 | pr_info("%s: enter\n", __func__); |
1339 | 1323 | ||
1340 | /* Count only IPv4 services for old get/setsockopt interface */ | 1324 | /* Count only IPv4 services for old get/setsockopt interface */ |
1341 | if (svc->af == AF_INET) | 1325 | if (svc->af == AF_INET) |
1342 | ip_vs_num_services--; | 1326 | ipvs->num_services--; |
1343 | 1327 | ||
1344 | ip_vs_kill_estimator(&svc->stats); | 1328 | ip_vs_kill_estimator(svc->net, &svc->stats); |
1345 | 1329 | ||
1346 | /* Unbind scheduler */ | 1330 | /* Unbind scheduler */ |
1347 | old_sched = svc->scheduler; | 1331 | old_sched = svc->scheduler; |
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1364 | */ | 1348 | */ |
1365 | list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { | 1349 | list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { |
1366 | __ip_vs_unlink_dest(svc, dest, 0); | 1350 | __ip_vs_unlink_dest(svc, dest, 0); |
1367 | __ip_vs_del_dest(dest); | 1351 | __ip_vs_del_dest(svc->net, dest); |
1368 | } | 1352 | } |
1369 | 1353 | ||
1370 | /* | 1354 | /* |
1371 | * Update the virtual service counters | 1355 | * Update the virtual service counters |
1372 | */ | 1356 | */ |
1373 | if (svc->port == FTPPORT) | 1357 | if (svc->port == FTPPORT) |
1374 | atomic_dec(&ip_vs_ftpsvc_counter); | 1358 | atomic_dec(&ipvs->ftpsvc_counter); |
1375 | else if (svc->port == 0) | 1359 | else if (svc->port == 0) |
1376 | atomic_dec(&ip_vs_nullsvc_counter); | 1360 | atomic_dec(&ipvs->nullsvc_counter); |
1377 | 1361 | ||
1378 | /* | 1362 | /* |
1379 | * Free the service if nobody refers to it | 1363 | * Free the service if nobody refers to it |
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1383 | svc->fwmark, | 1367 | svc->fwmark, |
1384 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 1368 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
1385 | ntohs(svc->port), atomic_read(&svc->usecnt)); | 1369 | ntohs(svc->port), atomic_read(&svc->usecnt)); |
1370 | free_percpu(svc->stats.cpustats); | ||
1386 | kfree(svc); | 1371 | kfree(svc); |
1387 | } | 1372 | } |
1388 | 1373 | ||
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) | |||
1428 | /* | 1413 | /* |
1429 | * Flush all the virtual services | 1414 | * Flush all the virtual services |
1430 | */ | 1415 | */ |
1431 | static int ip_vs_flush(void) | 1416 | static int ip_vs_flush(struct net *net) |
1432 | { | 1417 | { |
1433 | int idx; | 1418 | int idx; |
1434 | struct ip_vs_service *svc, *nxt; | 1419 | struct ip_vs_service *svc, *nxt; |
1435 | 1420 | ||
1436 | /* | 1421 | /* |
1437 | * Flush the service table hashed by <protocol,addr,port> | 1422 | * Flush the service table hashed by <netns,protocol,addr,port> |
1438 | */ | 1423 | */ |
1439 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1424 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1440 | list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { | 1425 | list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], |
1441 | ip_vs_unlink_service(svc); | 1426 | s_list) { |
1427 | if (net_eq(svc->net, net)) | ||
1428 | ip_vs_unlink_service(svc); | ||
1442 | } | 1429 | } |
1443 | } | 1430 | } |
1444 | 1431 | ||
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void) | |||
1448 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1435 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1449 | list_for_each_entry_safe(svc, nxt, | 1436 | list_for_each_entry_safe(svc, nxt, |
1450 | &ip_vs_svc_fwm_table[idx], f_list) { | 1437 | &ip_vs_svc_fwm_table[idx], f_list) { |
1451 | ip_vs_unlink_service(svc); | 1438 | if (net_eq(svc->net, net)) |
1439 | ip_vs_unlink_service(svc); | ||
1452 | } | 1440 | } |
1453 | } | 1441 | } |
1454 | 1442 | ||
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) | |||
1472 | return 0; | 1460 | return 0; |
1473 | } | 1461 | } |
1474 | 1462 | ||
1475 | static int ip_vs_zero_all(void) | 1463 | static int ip_vs_zero_all(struct net *net) |
1476 | { | 1464 | { |
1477 | int idx; | 1465 | int idx; |
1478 | struct ip_vs_service *svc; | 1466 | struct ip_vs_service *svc; |
1479 | 1467 | ||
1480 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1468 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1481 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 1469 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
1482 | ip_vs_zero_service(svc); | 1470 | if (net_eq(svc->net, net)) |
1471 | ip_vs_zero_service(svc); | ||
1483 | } | 1472 | } |
1484 | } | 1473 | } |
1485 | 1474 | ||
1486 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1475 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1487 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 1476 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
1488 | ip_vs_zero_service(svc); | 1477 | if (net_eq(svc->net, net)) |
1478 | ip_vs_zero_service(svc); | ||
1489 | } | 1479 | } |
1490 | } | 1480 | } |
1491 | 1481 | ||
1492 | ip_vs_zero_stats(&ip_vs_stats); | 1482 | ip_vs_zero_stats(net_ipvs(net)->tot_stats); |
1493 | return 0; | 1483 | return 0; |
1494 | } | 1484 | } |
1495 | 1485 | ||
@@ -1498,6 +1488,7 @@ static int | |||
1498 | proc_do_defense_mode(ctl_table *table, int write, | 1488 | proc_do_defense_mode(ctl_table *table, int write, |
1499 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1489 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1500 | { | 1490 | { |
1491 | struct net *net = current->nsproxy->net_ns; | ||
1501 | int *valp = table->data; | 1492 | int *valp = table->data; |
1502 | int val = *valp; | 1493 | int val = *valp; |
1503 | int rc; | 1494 | int rc; |
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write, | |||
1508 | /* Restore the correct value */ | 1499 | /* Restore the correct value */ |
1509 | *valp = val; | 1500 | *valp = val; |
1510 | } else { | 1501 | } else { |
1511 | update_defense_level(); | 1502 | update_defense_level(net_ipvs(net)); |
1512 | } | 1503 | } |
1513 | } | 1504 | } |
1514 | return rc; | 1505 | return rc; |
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write, | |||
1534 | return rc; | 1525 | return rc; |
1535 | } | 1526 | } |
1536 | 1527 | ||
1528 | static int | ||
1529 | proc_do_sync_mode(ctl_table *table, int write, | ||
1530 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1531 | { | ||
1532 | int *valp = table->data; | ||
1533 | int val = *valp; | ||
1534 | int rc; | ||
1535 | |||
1536 | rc = proc_dointvec(table, write, buffer, lenp, ppos); | ||
1537 | if (write && (*valp != val)) { | ||
1538 | if ((*valp < 0) || (*valp > 1)) { | ||
1539 | /* Restore the correct value */ | ||
1540 | *valp = val; | ||
1541 | } else { | ||
1542 | struct net *net = current->nsproxy->net_ns; | ||
1543 | ip_vs_sync_switch_mode(net, val); | ||
1544 | } | ||
1545 | } | ||
1546 | return rc; | ||
1547 | } | ||
1537 | 1548 | ||
1538 | /* | 1549 | /* |
1539 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) | 1550 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) |
1551 | * Do not change order or insert new entries without | ||
1552 | * align with netns init in __ip_vs_control_init() | ||
1540 | */ | 1553 | */ |
1541 | 1554 | ||
1542 | static struct ctl_table vs_vars[] = { | 1555 | static struct ctl_table vs_vars[] = { |
1543 | { | 1556 | { |
1544 | .procname = "amemthresh", | 1557 | .procname = "amemthresh", |
1545 | .data = &sysctl_ip_vs_amemthresh, | ||
1546 | .maxlen = sizeof(int), | 1558 | .maxlen = sizeof(int), |
1547 | .mode = 0644, | 1559 | .mode = 0644, |
1548 | .proc_handler = proc_dointvec, | 1560 | .proc_handler = proc_dointvec, |
1549 | }, | 1561 | }, |
1550 | #ifdef CONFIG_IP_VS_DEBUG | ||
1551 | { | ||
1552 | .procname = "debug_level", | ||
1553 | .data = &sysctl_ip_vs_debug_level, | ||
1554 | .maxlen = sizeof(int), | ||
1555 | .mode = 0644, | ||
1556 | .proc_handler = proc_dointvec, | ||
1557 | }, | ||
1558 | #endif | ||
1559 | { | 1562 | { |
1560 | .procname = "am_droprate", | 1563 | .procname = "am_droprate", |
1561 | .data = &sysctl_ip_vs_am_droprate, | ||
1562 | .maxlen = sizeof(int), | 1564 | .maxlen = sizeof(int), |
1563 | .mode = 0644, | 1565 | .mode = 0644, |
1564 | .proc_handler = proc_dointvec, | 1566 | .proc_handler = proc_dointvec, |
1565 | }, | 1567 | }, |
1566 | { | 1568 | { |
1567 | .procname = "drop_entry", | 1569 | .procname = "drop_entry", |
1568 | .data = &sysctl_ip_vs_drop_entry, | ||
1569 | .maxlen = sizeof(int), | 1570 | .maxlen = sizeof(int), |
1570 | .mode = 0644, | 1571 | .mode = 0644, |
1571 | .proc_handler = proc_do_defense_mode, | 1572 | .proc_handler = proc_do_defense_mode, |
1572 | }, | 1573 | }, |
1573 | { | 1574 | { |
1574 | .procname = "drop_packet", | 1575 | .procname = "drop_packet", |
1575 | .data = &sysctl_ip_vs_drop_packet, | ||
1576 | .maxlen = sizeof(int), | 1576 | .maxlen = sizeof(int), |
1577 | .mode = 0644, | 1577 | .mode = 0644, |
1578 | .proc_handler = proc_do_defense_mode, | 1578 | .proc_handler = proc_do_defense_mode, |
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = { | |||
1580 | #ifdef CONFIG_IP_VS_NFCT | 1580 | #ifdef CONFIG_IP_VS_NFCT |
1581 | { | 1581 | { |
1582 | .procname = "conntrack", | 1582 | .procname = "conntrack", |
1583 | .data = &sysctl_ip_vs_conntrack, | ||
1584 | .maxlen = sizeof(int), | 1583 | .maxlen = sizeof(int), |
1585 | .mode = 0644, | 1584 | .mode = 0644, |
1586 | .proc_handler = &proc_dointvec, | 1585 | .proc_handler = &proc_dointvec, |
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = { | |||
1588 | #endif | 1587 | #endif |
1589 | { | 1588 | { |
1590 | .procname = "secure_tcp", | 1589 | .procname = "secure_tcp", |
1591 | .data = &sysctl_ip_vs_secure_tcp, | ||
1592 | .maxlen = sizeof(int), | 1590 | .maxlen = sizeof(int), |
1593 | .mode = 0644, | 1591 | .mode = 0644, |
1594 | .proc_handler = proc_do_defense_mode, | 1592 | .proc_handler = proc_do_defense_mode, |
1595 | }, | 1593 | }, |
1596 | { | 1594 | { |
1597 | .procname = "snat_reroute", | 1595 | .procname = "snat_reroute", |
1598 | .data = &sysctl_ip_vs_snat_reroute, | ||
1599 | .maxlen = sizeof(int), | 1596 | .maxlen = sizeof(int), |
1600 | .mode = 0644, | 1597 | .mode = 0644, |
1601 | .proc_handler = &proc_dointvec, | 1598 | .proc_handler = &proc_dointvec, |
1602 | }, | 1599 | }, |
1600 | { | ||
1601 | .procname = "sync_version", | ||
1602 | .maxlen = sizeof(int), | ||
1603 | .mode = 0644, | ||
1604 | .proc_handler = &proc_do_sync_mode, | ||
1605 | }, | ||
1606 | { | ||
1607 | .procname = "cache_bypass", | ||
1608 | .maxlen = sizeof(int), | ||
1609 | .mode = 0644, | ||
1610 | .proc_handler = proc_dointvec, | ||
1611 | }, | ||
1612 | { | ||
1613 | .procname = "expire_nodest_conn", | ||
1614 | .maxlen = sizeof(int), | ||
1615 | .mode = 0644, | ||
1616 | .proc_handler = proc_dointvec, | ||
1617 | }, | ||
1618 | { | ||
1619 | .procname = "expire_quiescent_template", | ||
1620 | .maxlen = sizeof(int), | ||
1621 | .mode = 0644, | ||
1622 | .proc_handler = proc_dointvec, | ||
1623 | }, | ||
1624 | { | ||
1625 | .procname = "sync_threshold", | ||
1626 | .maxlen = | ||
1627 | sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), | ||
1628 | .mode = 0644, | ||
1629 | .proc_handler = proc_do_sync_threshold, | ||
1630 | }, | ||
1631 | { | ||
1632 | .procname = "nat_icmp_send", | ||
1633 | .maxlen = sizeof(int), | ||
1634 | .mode = 0644, | ||
1635 | .proc_handler = proc_dointvec, | ||
1636 | }, | ||
1637 | #ifdef CONFIG_IP_VS_DEBUG | ||
1638 | { | ||
1639 | .procname = "debug_level", | ||
1640 | .data = &sysctl_ip_vs_debug_level, | ||
1641 | .maxlen = sizeof(int), | ||
1642 | .mode = 0644, | ||
1643 | .proc_handler = proc_dointvec, | ||
1644 | }, | ||
1645 | #endif | ||
1603 | #if 0 | 1646 | #if 0 |
1604 | { | 1647 | { |
1605 | .procname = "timeout_established", | 1648 | .procname = "timeout_established", |
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = { | |||
1686 | .proc_handler = proc_dointvec_jiffies, | 1729 | .proc_handler = proc_dointvec_jiffies, |
1687 | }, | 1730 | }, |
1688 | #endif | 1731 | #endif |
1689 | { | ||
1690 | .procname = "cache_bypass", | ||
1691 | .data = &sysctl_ip_vs_cache_bypass, | ||
1692 | .maxlen = sizeof(int), | ||
1693 | .mode = 0644, | ||
1694 | .proc_handler = proc_dointvec, | ||
1695 | }, | ||
1696 | { | ||
1697 | .procname = "expire_nodest_conn", | ||
1698 | .data = &sysctl_ip_vs_expire_nodest_conn, | ||
1699 | .maxlen = sizeof(int), | ||
1700 | .mode = 0644, | ||
1701 | .proc_handler = proc_dointvec, | ||
1702 | }, | ||
1703 | { | ||
1704 | .procname = "expire_quiescent_template", | ||
1705 | .data = &sysctl_ip_vs_expire_quiescent_template, | ||
1706 | .maxlen = sizeof(int), | ||
1707 | .mode = 0644, | ||
1708 | .proc_handler = proc_dointvec, | ||
1709 | }, | ||
1710 | { | ||
1711 | .procname = "sync_threshold", | ||
1712 | .data = &sysctl_ip_vs_sync_threshold, | ||
1713 | .maxlen = sizeof(sysctl_ip_vs_sync_threshold), | ||
1714 | .mode = 0644, | ||
1715 | .proc_handler = proc_do_sync_threshold, | ||
1716 | }, | ||
1717 | { | ||
1718 | .procname = "nat_icmp_send", | ||
1719 | .data = &sysctl_ip_vs_nat_icmp_send, | ||
1720 | .maxlen = sizeof(int), | ||
1721 | .mode = 0644, | ||
1722 | .proc_handler = proc_dointvec, | ||
1723 | }, | ||
1724 | { } | 1732 | { } |
1725 | }; | 1733 | }; |
1726 | 1734 | ||
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = { | |||
1732 | }; | 1740 | }; |
1733 | EXPORT_SYMBOL_GPL(net_vs_ctl_path); | 1741 | EXPORT_SYMBOL_GPL(net_vs_ctl_path); |
1734 | 1742 | ||
1735 | static struct ctl_table_header * sysctl_header; | ||
1736 | |||
1737 | #ifdef CONFIG_PROC_FS | 1743 | #ifdef CONFIG_PROC_FS |
1738 | 1744 | ||
1739 | struct ip_vs_iter { | 1745 | struct ip_vs_iter { |
1746 | struct seq_net_private p; /* Do not move this, netns depends upon it*/ | ||
1740 | struct list_head *table; | 1747 | struct list_head *table; |
1741 | int bucket; | 1748 | int bucket; |
1742 | }; | 1749 | }; |
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) | |||
1763 | /* Get the Nth entry in the two lists */ | 1770 | /* Get the Nth entry in the two lists */ |
1764 | static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | 1771 | static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) |
1765 | { | 1772 | { |
1773 | struct net *net = seq_file_net(seq); | ||
1766 | struct ip_vs_iter *iter = seq->private; | 1774 | struct ip_vs_iter *iter = seq->private; |
1767 | int idx; | 1775 | int idx; |
1768 | struct ip_vs_service *svc; | 1776 | struct ip_vs_service *svc; |
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | |||
1770 | /* look in hash by protocol */ | 1778 | /* look in hash by protocol */ |
1771 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1779 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1772 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 1780 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
1773 | if (pos-- == 0){ | 1781 | if (net_eq(svc->net, net) && pos-- == 0) { |
1774 | iter->table = ip_vs_svc_table; | 1782 | iter->table = ip_vs_svc_table; |
1775 | iter->bucket = idx; | 1783 | iter->bucket = idx; |
1776 | return svc; | 1784 | return svc; |
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | |||
1781 | /* keep looking in fwmark */ | 1789 | /* keep looking in fwmark */ |
1782 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1790 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1783 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 1791 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
1784 | if (pos-- == 0) { | 1792 | if (net_eq(svc->net, net) && pos-- == 0) { |
1785 | iter->table = ip_vs_svc_fwm_table; | 1793 | iter->table = ip_vs_svc_fwm_table; |
1786 | iter->bucket = idx; | 1794 | iter->bucket = idx; |
1787 | return svc; | 1795 | return svc; |
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { | |||
1935 | 1943 | ||
1936 | static int ip_vs_info_open(struct inode *inode, struct file *file) | 1944 | static int ip_vs_info_open(struct inode *inode, struct file *file) |
1937 | { | 1945 | { |
1938 | return seq_open_private(file, &ip_vs_info_seq_ops, | 1946 | return seq_open_net(inode, file, &ip_vs_info_seq_ops, |
1939 | sizeof(struct ip_vs_iter)); | 1947 | sizeof(struct ip_vs_iter)); |
1940 | } | 1948 | } |
1941 | 1949 | ||
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = { | |||
1949 | 1957 | ||
1950 | #endif | 1958 | #endif |
1951 | 1959 | ||
1952 | struct ip_vs_stats ip_vs_stats = { | ||
1953 | .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), | ||
1954 | }; | ||
1955 | |||
1956 | #ifdef CONFIG_PROC_FS | 1960 | #ifdef CONFIG_PROC_FS |
1957 | static int ip_vs_stats_show(struct seq_file *seq, void *v) | 1961 | static int ip_vs_stats_show(struct seq_file *seq, void *v) |
1958 | { | 1962 | { |
1963 | struct net *net = seq_file_single_net(seq); | ||
1964 | struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; | ||
1959 | 1965 | ||
1960 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | 1966 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ |
1961 | seq_puts(seq, | 1967 | seq_puts(seq, |
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) | |||
1963 | seq_printf(seq, | 1969 | seq_printf(seq, |
1964 | " Conns Packets Packets Bytes Bytes\n"); | 1970 | " Conns Packets Packets Bytes Bytes\n"); |
1965 | 1971 | ||
1966 | spin_lock_bh(&ip_vs_stats.lock); | 1972 | spin_lock_bh(&tot_stats->lock); |
1967 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, | 1973 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, |
1968 | ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, | 1974 | tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, |
1969 | (unsigned long long) ip_vs_stats.ustats.inbytes, | 1975 | (unsigned long long) tot_stats->ustats.inbytes, |
1970 | (unsigned long long) ip_vs_stats.ustats.outbytes); | 1976 | (unsigned long long) tot_stats->ustats.outbytes); |
1971 | 1977 | ||
1972 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | 1978 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ |
1973 | seq_puts(seq, | 1979 | seq_puts(seq, |
1974 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); | 1980 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); |
1975 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", | 1981 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", |
1976 | ip_vs_stats.ustats.cps, | 1982 | tot_stats->ustats.cps, |
1977 | ip_vs_stats.ustats.inpps, | 1983 | tot_stats->ustats.inpps, |
1978 | ip_vs_stats.ustats.outpps, | 1984 | tot_stats->ustats.outpps, |
1979 | ip_vs_stats.ustats.inbps, | 1985 | tot_stats->ustats.inbps, |
1980 | ip_vs_stats.ustats.outbps); | 1986 | tot_stats->ustats.outbps); |
1981 | spin_unlock_bh(&ip_vs_stats.lock); | 1987 | spin_unlock_bh(&tot_stats->lock); |
1982 | 1988 | ||
1983 | return 0; | 1989 | return 0; |
1984 | } | 1990 | } |
1985 | 1991 | ||
1986 | static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) | 1992 | static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) |
1987 | { | 1993 | { |
1988 | return single_open(file, ip_vs_stats_show, NULL); | 1994 | return single_open_net(inode, file, ip_vs_stats_show); |
1989 | } | 1995 | } |
1990 | 1996 | ||
1991 | static const struct file_operations ip_vs_stats_fops = { | 1997 | static const struct file_operations ip_vs_stats_fops = { |
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = { | |||
1996 | .release = single_release, | 2002 | .release = single_release, |
1997 | }; | 2003 | }; |
1998 | 2004 | ||
2005 | static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) | ||
2006 | { | ||
2007 | struct net *net = seq_file_single_net(seq); | ||
2008 | struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; | ||
2009 | int i; | ||
2010 | |||
2011 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | ||
2012 | seq_puts(seq, | ||
2013 | " Total Incoming Outgoing Incoming Outgoing\n"); | ||
2014 | seq_printf(seq, | ||
2015 | "CPU Conns Packets Packets Bytes Bytes\n"); | ||
2016 | |||
2017 | for_each_possible_cpu(i) { | ||
2018 | struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); | ||
2019 | seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", | ||
2020 | i, u->ustats.conns, u->ustats.inpkts, | ||
2021 | u->ustats.outpkts, (__u64)u->ustats.inbytes, | ||
2022 | (__u64)u->ustats.outbytes); | ||
2023 | } | ||
2024 | |||
2025 | spin_lock_bh(&tot_stats->lock); | ||
2026 | seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", | ||
2027 | tot_stats->ustats.conns, tot_stats->ustats.inpkts, | ||
2028 | tot_stats->ustats.outpkts, | ||
2029 | (unsigned long long) tot_stats->ustats.inbytes, | ||
2030 | (unsigned long long) tot_stats->ustats.outbytes); | ||
2031 | |||
2032 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | ||
2033 | seq_puts(seq, | ||
2034 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); | ||
2035 | seq_printf(seq, " %8X %8X %8X %16X %16X\n", | ||
2036 | tot_stats->ustats.cps, | ||
2037 | tot_stats->ustats.inpps, | ||
2038 | tot_stats->ustats.outpps, | ||
2039 | tot_stats->ustats.inbps, | ||
2040 | tot_stats->ustats.outbps); | ||
2041 | spin_unlock_bh(&tot_stats->lock); | ||
2042 | |||
2043 | return 0; | ||
2044 | } | ||
2045 | |||
2046 | static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) | ||
2047 | { | ||
2048 | return single_open_net(inode, file, ip_vs_stats_percpu_show); | ||
2049 | } | ||
2050 | |||
2051 | static const struct file_operations ip_vs_stats_percpu_fops = { | ||
2052 | .owner = THIS_MODULE, | ||
2053 | .open = ip_vs_stats_percpu_seq_open, | ||
2054 | .read = seq_read, | ||
2055 | .llseek = seq_lseek, | ||
2056 | .release = single_release, | ||
2057 | }; | ||
1999 | #endif | 2058 | #endif |
2000 | 2059 | ||
2001 | /* | 2060 | /* |
2002 | * Set timeout values for tcp tcpfin udp in the timeout_table. | 2061 | * Set timeout values for tcp tcpfin udp in the timeout_table. |
2003 | */ | 2062 | */ |
2004 | static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) | 2063 | static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) |
2005 | { | 2064 | { |
2065 | struct ip_vs_proto_data *pd; | ||
2066 | |||
2006 | IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", | 2067 | IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", |
2007 | u->tcp_timeout, | 2068 | u->tcp_timeout, |
2008 | u->tcp_fin_timeout, | 2069 | u->tcp_fin_timeout, |
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) | |||
2010 | 2071 | ||
2011 | #ifdef CONFIG_IP_VS_PROTO_TCP | 2072 | #ifdef CONFIG_IP_VS_PROTO_TCP |
2012 | if (u->tcp_timeout) { | 2073 | if (u->tcp_timeout) { |
2013 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] | 2074 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2075 | pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] | ||
2014 | = u->tcp_timeout * HZ; | 2076 | = u->tcp_timeout * HZ; |
2015 | } | 2077 | } |
2016 | 2078 | ||
2017 | if (u->tcp_fin_timeout) { | 2079 | if (u->tcp_fin_timeout) { |
2018 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] | 2080 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2081 | pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] | ||
2019 | = u->tcp_fin_timeout * HZ; | 2082 | = u->tcp_fin_timeout * HZ; |
2020 | } | 2083 | } |
2021 | #endif | 2084 | #endif |
2022 | 2085 | ||
2023 | #ifdef CONFIG_IP_VS_PROTO_UDP | 2086 | #ifdef CONFIG_IP_VS_PROTO_UDP |
2024 | if (u->udp_timeout) { | 2087 | if (u->udp_timeout) { |
2025 | ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] | 2088 | pd = ip_vs_proto_data_get(net, IPPROTO_UDP); |
2089 | pd->timeout_table[IP_VS_UDP_S_NORMAL] | ||
2026 | = u->udp_timeout * HZ; | 2090 | = u->udp_timeout * HZ; |
2027 | } | 2091 | } |
2028 | #endif | 2092 | #endif |
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, | |||
2087 | static int | 2151 | static int |
2088 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | 2152 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) |
2089 | { | 2153 | { |
2154 | struct net *net = sock_net(sk); | ||
2090 | int ret; | 2155 | int ret; |
2091 | unsigned char arg[MAX_ARG_LEN]; | 2156 | unsigned char arg[MAX_ARG_LEN]; |
2092 | struct ip_vs_service_user *usvc_compat; | 2157 | struct ip_vs_service_user *usvc_compat; |
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2121 | 2186 | ||
2122 | if (cmd == IP_VS_SO_SET_FLUSH) { | 2187 | if (cmd == IP_VS_SO_SET_FLUSH) { |
2123 | /* Flush the virtual service */ | 2188 | /* Flush the virtual service */ |
2124 | ret = ip_vs_flush(); | 2189 | ret = ip_vs_flush(net); |
2125 | goto out_unlock; | 2190 | goto out_unlock; |
2126 | } else if (cmd == IP_VS_SO_SET_TIMEOUT) { | 2191 | } else if (cmd == IP_VS_SO_SET_TIMEOUT) { |
2127 | /* Set timeout values for (tcp tcpfin udp) */ | 2192 | /* Set timeout values for (tcp tcpfin udp) */ |
2128 | ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); | 2193 | ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); |
2129 | goto out_unlock; | 2194 | goto out_unlock; |
2130 | } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { | 2195 | } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { |
2131 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | 2196 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; |
2132 | ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); | 2197 | ret = start_sync_thread(net, dm->state, dm->mcast_ifn, |
2198 | dm->syncid); | ||
2133 | goto out_unlock; | 2199 | goto out_unlock; |
2134 | } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { | 2200 | } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { |
2135 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | 2201 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; |
2136 | ret = stop_sync_thread(dm->state); | 2202 | ret = stop_sync_thread(net, dm->state); |
2137 | goto out_unlock; | 2203 | goto out_unlock; |
2138 | } | 2204 | } |
2139 | 2205 | ||
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2148 | if (cmd == IP_VS_SO_SET_ZERO) { | 2214 | if (cmd == IP_VS_SO_SET_ZERO) { |
2149 | /* if no service address is set, zero counters in all */ | 2215 | /* if no service address is set, zero counters in all */ |
2150 | if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { | 2216 | if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { |
2151 | ret = ip_vs_zero_all(); | 2217 | ret = ip_vs_zero_all(net); |
2152 | goto out_unlock; | 2218 | goto out_unlock; |
2153 | } | 2219 | } |
2154 | } | 2220 | } |
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2165 | 2231 | ||
2166 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ | 2232 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ |
2167 | if (usvc.fwmark == 0) | 2233 | if (usvc.fwmark == 0) |
2168 | svc = __ip_vs_service_find(usvc.af, usvc.protocol, | 2234 | svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, |
2169 | &usvc.addr, usvc.port); | 2235 | &usvc.addr, usvc.port); |
2170 | else | 2236 | else |
2171 | svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); | 2237 | svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); |
2172 | 2238 | ||
2173 | if (cmd != IP_VS_SO_SET_ADD | 2239 | if (cmd != IP_VS_SO_SET_ADD |
2174 | && (svc == NULL || svc->protocol != usvc.protocol)) { | 2240 | && (svc == NULL || svc->protocol != usvc.protocol)) { |
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2181 | if (svc != NULL) | 2247 | if (svc != NULL) |
2182 | ret = -EEXIST; | 2248 | ret = -EEXIST; |
2183 | else | 2249 | else |
2184 | ret = ip_vs_add_service(&usvc, &svc); | 2250 | ret = ip_vs_add_service(net, &usvc, &svc); |
2185 | break; | 2251 | break; |
2186 | case IP_VS_SO_SET_EDIT: | 2252 | case IP_VS_SO_SET_EDIT: |
2187 | ret = ip_vs_edit_service(svc, &usvc); | 2253 | ret = ip_vs_edit_service(svc, &usvc); |
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) | |||
2241 | } | 2307 | } |
2242 | 2308 | ||
2243 | static inline int | 2309 | static inline int |
2244 | __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | 2310 | __ip_vs_get_service_entries(struct net *net, |
2311 | const struct ip_vs_get_services *get, | ||
2245 | struct ip_vs_get_services __user *uptr) | 2312 | struct ip_vs_get_services __user *uptr) |
2246 | { | 2313 | { |
2247 | int idx, count=0; | 2314 | int idx, count=0; |
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2252 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2319 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2253 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 2320 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
2254 | /* Only expose IPv4 entries to old interface */ | 2321 | /* Only expose IPv4 entries to old interface */ |
2255 | if (svc->af != AF_INET) | 2322 | if (svc->af != AF_INET || !net_eq(svc->net, net)) |
2256 | continue; | 2323 | continue; |
2257 | 2324 | ||
2258 | if (count >= get->num_services) | 2325 | if (count >= get->num_services) |
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2271 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2338 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2272 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 2339 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
2273 | /* Only expose IPv4 entries to old interface */ | 2340 | /* Only expose IPv4 entries to old interface */ |
2274 | if (svc->af != AF_INET) | 2341 | if (svc->af != AF_INET || !net_eq(svc->net, net)) |
2275 | continue; | 2342 | continue; |
2276 | 2343 | ||
2277 | if (count >= get->num_services) | 2344 | if (count >= get->num_services) |
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2291 | } | 2358 | } |
2292 | 2359 | ||
2293 | static inline int | 2360 | static inline int |
2294 | __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | 2361 | __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, |
2295 | struct ip_vs_get_dests __user *uptr) | 2362 | struct ip_vs_get_dests __user *uptr) |
2296 | { | 2363 | { |
2297 | struct ip_vs_service *svc; | 2364 | struct ip_vs_service *svc; |
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2299 | int ret = 0; | 2366 | int ret = 0; |
2300 | 2367 | ||
2301 | if (get->fwmark) | 2368 | if (get->fwmark) |
2302 | svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); | 2369 | svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); |
2303 | else | 2370 | else |
2304 | svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, | 2371 | svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, |
2305 | get->port); | 2372 | get->port); |
2306 | 2373 | ||
2307 | if (svc) { | 2374 | if (svc) { |
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2336 | } | 2403 | } |
2337 | 2404 | ||
2338 | static inline void | 2405 | static inline void |
2339 | __ip_vs_get_timeouts(struct ip_vs_timeout_user *u) | 2406 | __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) |
2340 | { | 2407 | { |
2408 | struct ip_vs_proto_data *pd; | ||
2409 | |||
2341 | #ifdef CONFIG_IP_VS_PROTO_TCP | 2410 | #ifdef CONFIG_IP_VS_PROTO_TCP |
2342 | u->tcp_timeout = | 2411 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2343 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; | 2412 | u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; |
2344 | u->tcp_fin_timeout = | 2413 | u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; |
2345 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; | ||
2346 | #endif | 2414 | #endif |
2347 | #ifdef CONFIG_IP_VS_PROTO_UDP | 2415 | #ifdef CONFIG_IP_VS_PROTO_UDP |
2416 | pd = ip_vs_proto_data_get(net, IPPROTO_UDP); | ||
2348 | u->udp_timeout = | 2417 | u->udp_timeout = |
2349 | ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; | 2418 | pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; |
2350 | #endif | 2419 | #endif |
2351 | } | 2420 | } |
2352 | 2421 | ||
@@ -2375,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2375 | unsigned char arg[128]; | 2444 | unsigned char arg[128]; |
2376 | int ret = 0; | 2445 | int ret = 0; |
2377 | unsigned int copylen; | 2446 | unsigned int copylen; |
2447 | struct net *net = sock_net(sk); | ||
2448 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
2378 | 2449 | ||
2450 | BUG_ON(!net); | ||
2379 | if (!capable(CAP_NET_ADMIN)) | 2451 | if (!capable(CAP_NET_ADMIN)) |
2380 | return -EPERM; | 2452 | return -EPERM; |
2381 | 2453 | ||
@@ -2418,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2418 | struct ip_vs_getinfo info; | 2490 | struct ip_vs_getinfo info; |
2419 | info.version = IP_VS_VERSION_CODE; | 2491 | info.version = IP_VS_VERSION_CODE; |
2420 | info.size = ip_vs_conn_tab_size; | 2492 | info.size = ip_vs_conn_tab_size; |
2421 | info.num_services = ip_vs_num_services; | 2493 | info.num_services = ipvs->num_services; |
2422 | if (copy_to_user(user, &info, sizeof(info)) != 0) | 2494 | if (copy_to_user(user, &info, sizeof(info)) != 0) |
2423 | ret = -EFAULT; | 2495 | ret = -EFAULT; |
2424 | } | 2496 | } |
@@ -2437,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2437 | ret = -EINVAL; | 2509 | ret = -EINVAL; |
2438 | goto out; | 2510 | goto out; |
2439 | } | 2511 | } |
2440 | ret = __ip_vs_get_service_entries(get, user); | 2512 | ret = __ip_vs_get_service_entries(net, get, user); |
2441 | } | 2513 | } |
2442 | break; | 2514 | break; |
2443 | 2515 | ||
@@ -2450,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2450 | entry = (struct ip_vs_service_entry *)arg; | 2522 | entry = (struct ip_vs_service_entry *)arg; |
2451 | addr.ip = entry->addr; | 2523 | addr.ip = entry->addr; |
2452 | if (entry->fwmark) | 2524 | if (entry->fwmark) |
2453 | svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); | 2525 | svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); |
2454 | else | 2526 | else |
2455 | svc = __ip_vs_service_find(AF_INET, entry->protocol, | 2527 | svc = __ip_vs_service_find(net, AF_INET, |
2456 | &addr, entry->port); | 2528 | entry->protocol, &addr, |
2529 | entry->port); | ||
2457 | if (svc) { | 2530 | if (svc) { |
2458 | ip_vs_copy_service(entry, svc); | 2531 | ip_vs_copy_service(entry, svc); |
2459 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) | 2532 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) |
@@ -2476,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2476 | ret = -EINVAL; | 2549 | ret = -EINVAL; |
2477 | goto out; | 2550 | goto out; |
2478 | } | 2551 | } |
2479 | ret = __ip_vs_get_dest_entries(get, user); | 2552 | ret = __ip_vs_get_dest_entries(net, get, user); |
2480 | } | 2553 | } |
2481 | break; | 2554 | break; |
2482 | 2555 | ||
@@ -2484,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2484 | { | 2557 | { |
2485 | struct ip_vs_timeout_user t; | 2558 | struct ip_vs_timeout_user t; |
2486 | 2559 | ||
2487 | __ip_vs_get_timeouts(&t); | 2560 | __ip_vs_get_timeouts(net, &t); |
2488 | if (copy_to_user(user, &t, sizeof(t)) != 0) | 2561 | if (copy_to_user(user, &t, sizeof(t)) != 0) |
2489 | ret = -EFAULT; | 2562 | ret = -EFAULT; |
2490 | } | 2563 | } |
@@ -2495,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2495 | struct ip_vs_daemon_user d[2]; | 2568 | struct ip_vs_daemon_user d[2]; |
2496 | 2569 | ||
2497 | memset(&d, 0, sizeof(d)); | 2570 | memset(&d, 0, sizeof(d)); |
2498 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) { | 2571 | if (ipvs->sync_state & IP_VS_STATE_MASTER) { |
2499 | d[0].state = IP_VS_STATE_MASTER; | 2572 | d[0].state = IP_VS_STATE_MASTER; |
2500 | strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); | 2573 | strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, |
2501 | d[0].syncid = ip_vs_master_syncid; | 2574 | sizeof(d[0].mcast_ifn)); |
2575 | d[0].syncid = ipvs->master_syncid; | ||
2502 | } | 2576 | } |
2503 | if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { | 2577 | if (ipvs->sync_state & IP_VS_STATE_BACKUP) { |
2504 | d[1].state = IP_VS_STATE_BACKUP; | 2578 | d[1].state = IP_VS_STATE_BACKUP; |
2505 | strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); | 2579 | strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, |
2506 | d[1].syncid = ip_vs_backup_syncid; | 2580 | sizeof(d[1].mcast_ifn)); |
2581 | d[1].syncid = ipvs->backup_syncid; | ||
2507 | } | 2582 | } |
2508 | if (copy_to_user(user, &d, sizeof(d)) != 0) | 2583 | if (copy_to_user(user, &d, sizeof(d)) != 0) |
2509 | ret = -EFAULT; | 2584 | ret = -EFAULT; |
@@ -2542,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = { | |||
2542 | .name = IPVS_GENL_NAME, | 2617 | .name = IPVS_GENL_NAME, |
2543 | .version = IPVS_GENL_VERSION, | 2618 | .version = IPVS_GENL_VERSION, |
2544 | .maxattr = IPVS_CMD_MAX, | 2619 | .maxattr = IPVS_CMD_MAX, |
2620 | .netnsok = true, /* Make ipvsadm to work on netns */ | ||
2545 | }; | 2621 | }; |
2546 | 2622 | ||
2547 | /* Policy used for first-level command attributes */ | 2623 | /* Policy used for first-level command attributes */ |
@@ -2696,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, | |||
2696 | int idx = 0, i; | 2772 | int idx = 0, i; |
2697 | int start = cb->args[0]; | 2773 | int start = cb->args[0]; |
2698 | struct ip_vs_service *svc; | 2774 | struct ip_vs_service *svc; |
2775 | struct net *net = skb_sknet(skb); | ||
2699 | 2776 | ||
2700 | mutex_lock(&__ip_vs_mutex); | 2777 | mutex_lock(&__ip_vs_mutex); |
2701 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | 2778 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { |
2702 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { | 2779 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { |
2703 | if (++idx <= start) | 2780 | if (++idx <= start || !net_eq(svc->net, net)) |
2704 | continue; | 2781 | continue; |
2705 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | 2782 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { |
2706 | idx--; | 2783 | idx--; |
@@ -2711,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, | |||
2711 | 2788 | ||
2712 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | 2789 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { |
2713 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { | 2790 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { |
2714 | if (++idx <= start) | 2791 | if (++idx <= start || !net_eq(svc->net, net)) |
2715 | continue; | 2792 | continue; |
2716 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | 2793 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { |
2717 | idx--; | 2794 | idx--; |
@@ -2727,7 +2804,8 @@ nla_put_failure: | |||
2727 | return skb->len; | 2804 | return skb->len; |
2728 | } | 2805 | } |
2729 | 2806 | ||
2730 | static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | 2807 | static int ip_vs_genl_parse_service(struct net *net, |
2808 | struct ip_vs_service_user_kern *usvc, | ||
2731 | struct nlattr *nla, int full_entry, | 2809 | struct nlattr *nla, int full_entry, |
2732 | struct ip_vs_service **ret_svc) | 2810 | struct ip_vs_service **ret_svc) |
2733 | { | 2811 | { |
@@ -2770,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | |||
2770 | } | 2848 | } |
2771 | 2849 | ||
2772 | if (usvc->fwmark) | 2850 | if (usvc->fwmark) |
2773 | svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); | 2851 | svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); |
2774 | else | 2852 | else |
2775 | svc = __ip_vs_service_find(usvc->af, usvc->protocol, | 2853 | svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, |
2776 | &usvc->addr, usvc->port); | 2854 | &usvc->addr, usvc->port); |
2777 | *ret_svc = svc; | 2855 | *ret_svc = svc; |
2778 | 2856 | ||
@@ -2809,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | |||
2809 | return 0; | 2887 | return 0; |
2810 | } | 2888 | } |
2811 | 2889 | ||
2812 | static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) | 2890 | static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, |
2891 | struct nlattr *nla) | ||
2813 | { | 2892 | { |
2814 | struct ip_vs_service_user_kern usvc; | 2893 | struct ip_vs_service_user_kern usvc; |
2815 | struct ip_vs_service *svc; | 2894 | struct ip_vs_service *svc; |
2816 | int ret; | 2895 | int ret; |
2817 | 2896 | ||
2818 | ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); | 2897 | ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); |
2819 | return ret ? ERR_PTR(ret) : svc; | 2898 | return ret ? ERR_PTR(ret) : svc; |
2820 | } | 2899 | } |
2821 | 2900 | ||
@@ -2883,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, | |||
2883 | struct ip_vs_service *svc; | 2962 | struct ip_vs_service *svc; |
2884 | struct ip_vs_dest *dest; | 2963 | struct ip_vs_dest *dest; |
2885 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; | 2964 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; |
2965 | struct net *net = skb_sknet(skb); | ||
2886 | 2966 | ||
2887 | mutex_lock(&__ip_vs_mutex); | 2967 | mutex_lock(&__ip_vs_mutex); |
2888 | 2968 | ||
@@ -2891,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, | |||
2891 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) | 2971 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) |
2892 | goto out_err; | 2972 | goto out_err; |
2893 | 2973 | ||
2894 | svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); | 2974 | |
2975 | svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); | ||
2895 | if (IS_ERR(svc) || svc == NULL) | 2976 | if (IS_ERR(svc) || svc == NULL) |
2896 | goto out_err; | 2977 | goto out_err; |
2897 | 2978 | ||
@@ -3005,20 +3086,23 @@ nla_put_failure: | |||
3005 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | 3086 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, |
3006 | struct netlink_callback *cb) | 3087 | struct netlink_callback *cb) |
3007 | { | 3088 | { |
3089 | struct net *net = skb_net(skb); | ||
3090 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3091 | |||
3008 | mutex_lock(&__ip_vs_mutex); | 3092 | mutex_lock(&__ip_vs_mutex); |
3009 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { | 3093 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { |
3010 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, | 3094 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, |
3011 | ip_vs_master_mcast_ifn, | 3095 | ipvs->master_mcast_ifn, |
3012 | ip_vs_master_syncid, cb) < 0) | 3096 | ipvs->master_syncid, cb) < 0) |
3013 | goto nla_put_failure; | 3097 | goto nla_put_failure; |
3014 | 3098 | ||
3015 | cb->args[0] = 1; | 3099 | cb->args[0] = 1; |
3016 | } | 3100 | } |
3017 | 3101 | ||
3018 | if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { | 3102 | if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { |
3019 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, | 3103 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, |
3020 | ip_vs_backup_mcast_ifn, | 3104 | ipvs->backup_mcast_ifn, |
3021 | ip_vs_backup_syncid, cb) < 0) | 3105 | ipvs->backup_syncid, cb) < 0) |
3022 | goto nla_put_failure; | 3106 | goto nla_put_failure; |
3023 | 3107 | ||
3024 | cb->args[1] = 1; | 3108 | cb->args[1] = 1; |
@@ -3030,31 +3114,33 @@ nla_put_failure: | |||
3030 | return skb->len; | 3114 | return skb->len; |
3031 | } | 3115 | } |
3032 | 3116 | ||
3033 | static int ip_vs_genl_new_daemon(struct nlattr **attrs) | 3117 | static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) |
3034 | { | 3118 | { |
3035 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && | 3119 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && |
3036 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && | 3120 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && |
3037 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) | 3121 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) |
3038 | return -EINVAL; | 3122 | return -EINVAL; |
3039 | 3123 | ||
3040 | return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | 3124 | return start_sync_thread(net, |
3125 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | ||
3041 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), | 3126 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), |
3042 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); | 3127 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); |
3043 | } | 3128 | } |
3044 | 3129 | ||
3045 | static int ip_vs_genl_del_daemon(struct nlattr **attrs) | 3130 | static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) |
3046 | { | 3131 | { |
3047 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) | 3132 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) |
3048 | return -EINVAL; | 3133 | return -EINVAL; |
3049 | 3134 | ||
3050 | return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | 3135 | return stop_sync_thread(net, |
3136 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | ||
3051 | } | 3137 | } |
3052 | 3138 | ||
3053 | static int ip_vs_genl_set_config(struct nlattr **attrs) | 3139 | static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) |
3054 | { | 3140 | { |
3055 | struct ip_vs_timeout_user t; | 3141 | struct ip_vs_timeout_user t; |
3056 | 3142 | ||
3057 | __ip_vs_get_timeouts(&t); | 3143 | __ip_vs_get_timeouts(net, &t); |
3058 | 3144 | ||
3059 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) | 3145 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) |
3060 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); | 3146 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); |
@@ -3066,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) | |||
3066 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) | 3152 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) |
3067 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); | 3153 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); |
3068 | 3154 | ||
3069 | return ip_vs_set_timeout(&t); | 3155 | return ip_vs_set_timeout(net, &t); |
3070 | } | 3156 | } |
3071 | 3157 | ||
3072 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | 3158 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) |
@@ -3076,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3076 | struct ip_vs_dest_user_kern udest; | 3162 | struct ip_vs_dest_user_kern udest; |
3077 | int ret = 0, cmd; | 3163 | int ret = 0, cmd; |
3078 | int need_full_svc = 0, need_full_dest = 0; | 3164 | int need_full_svc = 0, need_full_dest = 0; |
3165 | struct net *net; | ||
3166 | struct netns_ipvs *ipvs; | ||
3079 | 3167 | ||
3168 | net = skb_sknet(skb); | ||
3169 | ipvs = net_ipvs(net); | ||
3080 | cmd = info->genlhdr->cmd; | 3170 | cmd = info->genlhdr->cmd; |
3081 | 3171 | ||
3082 | mutex_lock(&__ip_vs_mutex); | 3172 | mutex_lock(&__ip_vs_mutex); |
3083 | 3173 | ||
3084 | if (cmd == IPVS_CMD_FLUSH) { | 3174 | if (cmd == IPVS_CMD_FLUSH) { |
3085 | ret = ip_vs_flush(); | 3175 | ret = ip_vs_flush(net); |
3086 | goto out; | 3176 | goto out; |
3087 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | 3177 | } else if (cmd == IPVS_CMD_SET_CONFIG) { |
3088 | ret = ip_vs_genl_set_config(info->attrs); | 3178 | ret = ip_vs_genl_set_config(net, info->attrs); |
3089 | goto out; | 3179 | goto out; |
3090 | } else if (cmd == IPVS_CMD_NEW_DAEMON || | 3180 | } else if (cmd == IPVS_CMD_NEW_DAEMON || |
3091 | cmd == IPVS_CMD_DEL_DAEMON) { | 3181 | cmd == IPVS_CMD_DEL_DAEMON) { |
@@ -3101,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3101 | } | 3191 | } |
3102 | 3192 | ||
3103 | if (cmd == IPVS_CMD_NEW_DAEMON) | 3193 | if (cmd == IPVS_CMD_NEW_DAEMON) |
3104 | ret = ip_vs_genl_new_daemon(daemon_attrs); | 3194 | ret = ip_vs_genl_new_daemon(net, daemon_attrs); |
3105 | else | 3195 | else |
3106 | ret = ip_vs_genl_del_daemon(daemon_attrs); | 3196 | ret = ip_vs_genl_del_daemon(net, daemon_attrs); |
3107 | goto out; | 3197 | goto out; |
3108 | } else if (cmd == IPVS_CMD_ZERO && | 3198 | } else if (cmd == IPVS_CMD_ZERO && |
3109 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { | 3199 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { |
3110 | ret = ip_vs_zero_all(); | 3200 | ret = ip_vs_zero_all(net); |
3111 | goto out; | 3201 | goto out; |
3112 | } | 3202 | } |
3113 | 3203 | ||
@@ -3117,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3117 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) | 3207 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) |
3118 | need_full_svc = 1; | 3208 | need_full_svc = 1; |
3119 | 3209 | ||
3120 | ret = ip_vs_genl_parse_service(&usvc, | 3210 | ret = ip_vs_genl_parse_service(net, &usvc, |
3121 | info->attrs[IPVS_CMD_ATTR_SERVICE], | 3211 | info->attrs[IPVS_CMD_ATTR_SERVICE], |
3122 | need_full_svc, &svc); | 3212 | need_full_svc, &svc); |
3123 | if (ret) | 3213 | if (ret) |
@@ -3147,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3147 | switch (cmd) { | 3237 | switch (cmd) { |
3148 | case IPVS_CMD_NEW_SERVICE: | 3238 | case IPVS_CMD_NEW_SERVICE: |
3149 | if (svc == NULL) | 3239 | if (svc == NULL) |
3150 | ret = ip_vs_add_service(&usvc, &svc); | 3240 | ret = ip_vs_add_service(net, &usvc, &svc); |
3151 | else | 3241 | else |
3152 | ret = -EEXIST; | 3242 | ret = -EEXIST; |
3153 | break; | 3243 | break; |
@@ -3185,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3185 | struct sk_buff *msg; | 3275 | struct sk_buff *msg; |
3186 | void *reply; | 3276 | void *reply; |
3187 | int ret, cmd, reply_cmd; | 3277 | int ret, cmd, reply_cmd; |
3278 | struct net *net; | ||
3279 | struct netns_ipvs *ipvs; | ||
3188 | 3280 | ||
3281 | net = skb_sknet(skb); | ||
3282 | ipvs = net_ipvs(net); | ||
3189 | cmd = info->genlhdr->cmd; | 3283 | cmd = info->genlhdr->cmd; |
3190 | 3284 | ||
3191 | if (cmd == IPVS_CMD_GET_SERVICE) | 3285 | if (cmd == IPVS_CMD_GET_SERVICE) |
@@ -3214,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3214 | { | 3308 | { |
3215 | struct ip_vs_service *svc; | 3309 | struct ip_vs_service *svc; |
3216 | 3310 | ||
3217 | svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); | 3311 | svc = ip_vs_genl_find_service(net, |
3312 | info->attrs[IPVS_CMD_ATTR_SERVICE]); | ||
3218 | if (IS_ERR(svc)) { | 3313 | if (IS_ERR(svc)) { |
3219 | ret = PTR_ERR(svc); | 3314 | ret = PTR_ERR(svc); |
3220 | goto out_err; | 3315 | goto out_err; |
@@ -3234,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3234 | { | 3329 | { |
3235 | struct ip_vs_timeout_user t; | 3330 | struct ip_vs_timeout_user t; |
3236 | 3331 | ||
3237 | __ip_vs_get_timeouts(&t); | 3332 | __ip_vs_get_timeouts(net, &t); |
3238 | #ifdef CONFIG_IP_VS_PROTO_TCP | 3333 | #ifdef CONFIG_IP_VS_PROTO_TCP |
3239 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); | 3334 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); |
3240 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, | 3335 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, |
@@ -3380,62 +3475,172 @@ static void ip_vs_genl_unregister(void) | |||
3380 | 3475 | ||
3381 | /* End of Generic Netlink interface definitions */ | 3476 | /* End of Generic Netlink interface definitions */ |
3382 | 3477 | ||
3478 | /* | ||
3479 | * per netns intit/exit func. | ||
3480 | */ | ||
3481 | int __net_init __ip_vs_control_init(struct net *net) | ||
3482 | { | ||
3483 | int idx; | ||
3484 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3485 | struct ctl_table *tbl; | ||
3486 | |||
3487 | atomic_set(&ipvs->dropentry, 0); | ||
3488 | spin_lock_init(&ipvs->dropentry_lock); | ||
3489 | spin_lock_init(&ipvs->droppacket_lock); | ||
3490 | spin_lock_init(&ipvs->securetcp_lock); | ||
3491 | ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); | ||
3492 | |||
3493 | /* Initialize rs_table */ | ||
3494 | for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) | ||
3495 | INIT_LIST_HEAD(&ipvs->rs_table[idx]); | ||
3496 | |||
3497 | INIT_LIST_HEAD(&ipvs->dest_trash); | ||
3498 | atomic_set(&ipvs->ftpsvc_counter, 0); | ||
3499 | atomic_set(&ipvs->nullsvc_counter, 0); | ||
3500 | |||
3501 | /* procfs stats */ | ||
3502 | ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); | ||
3503 | if (ipvs->tot_stats == NULL) { | ||
3504 | pr_err("%s(): no memory.\n", __func__); | ||
3505 | return -ENOMEM; | ||
3506 | } | ||
3507 | ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
3508 | if (!ipvs->cpustats) { | ||
3509 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
3510 | goto err_alloc; | ||
3511 | } | ||
3512 | spin_lock_init(&ipvs->tot_stats->lock); | ||
3513 | |||
3514 | for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) | ||
3515 | INIT_LIST_HEAD(&ipvs->rs_table[idx]); | ||
3516 | |||
3517 | proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); | ||
3518 | proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); | ||
3519 | proc_net_fops_create(net, "ip_vs_stats_percpu", 0, | ||
3520 | &ip_vs_stats_percpu_fops); | ||
3521 | |||
3522 | if (!net_eq(net, &init_net)) { | ||
3523 | tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); | ||
3524 | if (tbl == NULL) | ||
3525 | goto err_dup; | ||
3526 | } else | ||
3527 | tbl = vs_vars; | ||
3528 | /* Initialize sysctl defaults */ | ||
3529 | idx = 0; | ||
3530 | ipvs->sysctl_amemthresh = 1024; | ||
3531 | tbl[idx++].data = &ipvs->sysctl_amemthresh; | ||
3532 | ipvs->sysctl_am_droprate = 10; | ||
3533 | tbl[idx++].data = &ipvs->sysctl_am_droprate; | ||
3534 | tbl[idx++].data = &ipvs->sysctl_drop_entry; | ||
3535 | tbl[idx++].data = &ipvs->sysctl_drop_packet; | ||
3536 | #ifdef CONFIG_IP_VS_NFCT | ||
3537 | tbl[idx++].data = &ipvs->sysctl_conntrack; | ||
3538 | #endif | ||
3539 | tbl[idx++].data = &ipvs->sysctl_secure_tcp; | ||
3540 | ipvs->sysctl_snat_reroute = 1; | ||
3541 | tbl[idx++].data = &ipvs->sysctl_snat_reroute; | ||
3542 | ipvs->sysctl_sync_ver = 1; | ||
3543 | tbl[idx++].data = &ipvs->sysctl_sync_ver; | ||
3544 | tbl[idx++].data = &ipvs->sysctl_cache_bypass; | ||
3545 | tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; | ||
3546 | tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; | ||
3547 | ipvs->sysctl_sync_threshold[0] = 3; | ||
3548 | ipvs->sysctl_sync_threshold[1] = 50; | ||
3549 | tbl[idx].data = &ipvs->sysctl_sync_threshold; | ||
3550 | tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); | ||
3551 | tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; | ||
3552 | |||
3553 | |||
3554 | ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, | ||
3555 | vs_vars); | ||
3556 | if (ipvs->sysctl_hdr == NULL) | ||
3557 | goto err_reg; | ||
3558 | ip_vs_new_estimator(net, ipvs->tot_stats); | ||
3559 | ipvs->sysctl_tbl = tbl; | ||
3560 | /* Schedule defense work */ | ||
3561 | INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); | ||
3562 | schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); | ||
3563 | return 0; | ||
3564 | |||
3565 | err_reg: | ||
3566 | if (!net_eq(net, &init_net)) | ||
3567 | kfree(tbl); | ||
3568 | err_dup: | ||
3569 | free_percpu(ipvs->cpustats); | ||
3570 | err_alloc: | ||
3571 | kfree(ipvs->tot_stats); | ||
3572 | return -ENOMEM; | ||
3573 | } | ||
3574 | |||
3575 | static void __net_exit __ip_vs_control_cleanup(struct net *net) | ||
3576 | { | ||
3577 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3578 | |||
3579 | ip_vs_trash_cleanup(net); | ||
3580 | ip_vs_kill_estimator(net, ipvs->tot_stats); | ||
3581 | cancel_delayed_work_sync(&ipvs->defense_work); | ||
3582 | cancel_work_sync(&ipvs->defense_work.work); | ||
3583 | unregister_net_sysctl_table(ipvs->sysctl_hdr); | ||
3584 | proc_net_remove(net, "ip_vs_stats_percpu"); | ||
3585 | proc_net_remove(net, "ip_vs_stats"); | ||
3586 | proc_net_remove(net, "ip_vs"); | ||
3587 | free_percpu(ipvs->cpustats); | ||
3588 | kfree(ipvs->tot_stats); | ||
3589 | } | ||
3590 | |||
3591 | static struct pernet_operations ipvs_control_ops = { | ||
3592 | .init = __ip_vs_control_init, | ||
3593 | .exit = __ip_vs_control_cleanup, | ||
3594 | }; | ||
3383 | 3595 | ||
3384 | int __init ip_vs_control_init(void) | 3596 | int __init ip_vs_control_init(void) |
3385 | { | 3597 | { |
3386 | int ret; | ||
3387 | int idx; | 3598 | int idx; |
3599 | int ret; | ||
3388 | 3600 | ||
3389 | EnterFunction(2); | 3601 | EnterFunction(2); |
3390 | 3602 | ||
3391 | /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ | 3603 | /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ |
3392 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 3604 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
3393 | INIT_LIST_HEAD(&ip_vs_svc_table[idx]); | 3605 | INIT_LIST_HEAD(&ip_vs_svc_table[idx]); |
3394 | INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); | 3606 | INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); |
3395 | } | 3607 | } |
3396 | for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { | 3608 | |
3397 | INIT_LIST_HEAD(&ip_vs_rtable[idx]); | 3609 | ret = register_pernet_subsys(&ipvs_control_ops); |
3610 | if (ret) { | ||
3611 | pr_err("cannot register namespace.\n"); | ||
3612 | goto err; | ||
3398 | } | 3613 | } |
3399 | smp_wmb(); | 3614 | |
3615 | smp_wmb(); /* Do we really need it now ? */ | ||
3400 | 3616 | ||
3401 | ret = nf_register_sockopt(&ip_vs_sockopts); | 3617 | ret = nf_register_sockopt(&ip_vs_sockopts); |
3402 | if (ret) { | 3618 | if (ret) { |
3403 | pr_err("cannot register sockopt.\n"); | 3619 | pr_err("cannot register sockopt.\n"); |
3404 | return ret; | 3620 | goto err_net; |
3405 | } | 3621 | } |
3406 | 3622 | ||
3407 | ret = ip_vs_genl_register(); | 3623 | ret = ip_vs_genl_register(); |
3408 | if (ret) { | 3624 | if (ret) { |
3409 | pr_err("cannot register Generic Netlink interface.\n"); | 3625 | pr_err("cannot register Generic Netlink interface.\n"); |
3410 | nf_unregister_sockopt(&ip_vs_sockopts); | 3626 | nf_unregister_sockopt(&ip_vs_sockopts); |
3411 | return ret; | 3627 | goto err_net; |
3412 | } | 3628 | } |
3413 | 3629 | ||
3414 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); | ||
3415 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); | ||
3416 | |||
3417 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); | ||
3418 | |||
3419 | ip_vs_new_estimator(&ip_vs_stats); | ||
3420 | |||
3421 | /* Hook the defense timer */ | ||
3422 | schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); | ||
3423 | |||
3424 | LeaveFunction(2); | 3630 | LeaveFunction(2); |
3425 | return 0; | 3631 | return 0; |
3632 | |||
3633 | err_net: | ||
3634 | unregister_pernet_subsys(&ipvs_control_ops); | ||
3635 | err: | ||
3636 | return ret; | ||
3426 | } | 3637 | } |
3427 | 3638 | ||
3428 | 3639 | ||
3429 | void ip_vs_control_cleanup(void) | 3640 | void ip_vs_control_cleanup(void) |
3430 | { | 3641 | { |
3431 | EnterFunction(2); | 3642 | EnterFunction(2); |
3432 | ip_vs_trash_cleanup(); | 3643 | unregister_pernet_subsys(&ipvs_control_ops); |
3433 | cancel_delayed_work_sync(&defense_work); | ||
3434 | cancel_work_sync(&defense_work.work); | ||
3435 | ip_vs_kill_estimator(&ip_vs_stats); | ||
3436 | unregister_sysctl_table(sysctl_header); | ||
3437 | proc_net_remove(&init_net, "ip_vs_stats"); | ||
3438 | proc_net_remove(&init_net, "ip_vs"); | ||
3439 | ip_vs_genl_unregister(); | 3644 | ip_vs_genl_unregister(); |
3440 | nf_unregister_sockopt(&ip_vs_sockopts); | 3645 | nf_unregister_sockopt(&ip_vs_sockopts); |
3441 | LeaveFunction(2); | 3646 | LeaveFunction(2); |
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ff28801962e0..f560a05c965a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c | |||
@@ -8,8 +8,12 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | * Changes: | 11 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
12 | * | 12 | * Network name space (netns) aware. |
13 | * Global data moved to netns i.e struct netns_ipvs | ||
14 | * Affected data: est_list and est_lock. | ||
15 | * estimation_timer() runs with timer per netns. | ||
16 | * get_stats()) do the per cpu summing. | ||
13 | */ | 17 | */ |
14 | 18 | ||
15 | #define KMSG_COMPONENT "IPVS" | 19 | #define KMSG_COMPONENT "IPVS" |
@@ -48,11 +52,42 @@ | |||
48 | */ | 52 | */ |
49 | 53 | ||
50 | 54 | ||
51 | static void estimation_timer(unsigned long arg); | 55 | /* |
56 | * Make a summary from each cpu | ||
57 | */ | ||
58 | static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | ||
59 | struct ip_vs_cpu_stats *stats) | ||
60 | { | ||
61 | int i; | ||
62 | |||
63 | for_each_possible_cpu(i) { | ||
64 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); | ||
65 | unsigned int start; | ||
66 | __u64 inbytes, outbytes; | ||
67 | if (i) { | ||
68 | sum->conns += s->ustats.conns; | ||
69 | sum->inpkts += s->ustats.inpkts; | ||
70 | sum->outpkts += s->ustats.outpkts; | ||
71 | do { | ||
72 | start = u64_stats_fetch_begin_bh(&s->syncp); | ||
73 | inbytes = s->ustats.inbytes; | ||
74 | outbytes = s->ustats.outbytes; | ||
75 | } while (u64_stats_fetch_retry_bh(&s->syncp, start)); | ||
76 | sum->inbytes += inbytes; | ||
77 | sum->outbytes += outbytes; | ||
78 | } else { | ||
79 | sum->conns = s->ustats.conns; | ||
80 | sum->inpkts = s->ustats.inpkts; | ||
81 | sum->outpkts = s->ustats.outpkts; | ||
82 | do { | ||
83 | start = u64_stats_fetch_begin_bh(&s->syncp); | ||
84 | sum->inbytes = s->ustats.inbytes; | ||
85 | sum->outbytes = s->ustats.outbytes; | ||
86 | } while (u64_stats_fetch_retry_bh(&s->syncp, start)); | ||
87 | } | ||
88 | } | ||
89 | } | ||
52 | 90 | ||
53 | static LIST_HEAD(est_list); | ||
54 | static DEFINE_SPINLOCK(est_lock); | ||
55 | static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); | ||
56 | 91 | ||
57 | static void estimation_timer(unsigned long arg) | 92 | static void estimation_timer(unsigned long arg) |
58 | { | 93 | { |
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg) | |||
62 | u32 n_inpkts, n_outpkts; | 97 | u32 n_inpkts, n_outpkts; |
63 | u64 n_inbytes, n_outbytes; | 98 | u64 n_inbytes, n_outbytes; |
64 | u32 rate; | 99 | u32 rate; |
100 | struct net *net = (struct net *)arg; | ||
101 | struct netns_ipvs *ipvs; | ||
65 | 102 | ||
66 | spin_lock(&est_lock); | 103 | ipvs = net_ipvs(net); |
67 | list_for_each_entry(e, &est_list, list) { | 104 | ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); |
105 | spin_lock(&ipvs->est_lock); | ||
106 | list_for_each_entry(e, &ipvs->est_list, list) { | ||
68 | s = container_of(e, struct ip_vs_stats, est); | 107 | s = container_of(e, struct ip_vs_stats, est); |
69 | 108 | ||
109 | ip_vs_read_cpu_stats(&s->ustats, s->cpustats); | ||
70 | spin_lock(&s->lock); | 110 | spin_lock(&s->lock); |
71 | n_conns = s->ustats.conns; | 111 | n_conns = s->ustats.conns; |
72 | n_inpkts = s->ustats.inpkts; | 112 | n_inpkts = s->ustats.inpkts; |
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg) | |||
75 | n_outbytes = s->ustats.outbytes; | 115 | n_outbytes = s->ustats.outbytes; |
76 | 116 | ||
77 | /* scaled by 2^10, but divided 2 seconds */ | 117 | /* scaled by 2^10, but divided 2 seconds */ |
78 | rate = (n_conns - e->last_conns)<<9; | 118 | rate = (n_conns - e->last_conns) << 9; |
79 | e->last_conns = n_conns; | 119 | e->last_conns = n_conns; |
80 | e->cps += ((long)rate - (long)e->cps)>>2; | 120 | e->cps += ((long)rate - (long)e->cps) >> 2; |
81 | s->ustats.cps = (e->cps+0x1FF)>>10; | 121 | s->ustats.cps = (e->cps + 0x1FF) >> 10; |
82 | 122 | ||
83 | rate = (n_inpkts - e->last_inpkts)<<9; | 123 | rate = (n_inpkts - e->last_inpkts) << 9; |
84 | e->last_inpkts = n_inpkts; | 124 | e->last_inpkts = n_inpkts; |
85 | e->inpps += ((long)rate - (long)e->inpps)>>2; | 125 | e->inpps += ((long)rate - (long)e->inpps) >> 2; |
86 | s->ustats.inpps = (e->inpps+0x1FF)>>10; | 126 | s->ustats.inpps = (e->inpps + 0x1FF) >> 10; |
87 | 127 | ||
88 | rate = (n_outpkts - e->last_outpkts)<<9; | 128 | rate = (n_outpkts - e->last_outpkts) << 9; |
89 | e->last_outpkts = n_outpkts; | 129 | e->last_outpkts = n_outpkts; |
90 | e->outpps += ((long)rate - (long)e->outpps)>>2; | 130 | e->outpps += ((long)rate - (long)e->outpps) >> 2; |
91 | s->ustats.outpps = (e->outpps+0x1FF)>>10; | 131 | s->ustats.outpps = (e->outpps + 0x1FF) >> 10; |
92 | 132 | ||
93 | rate = (n_inbytes - e->last_inbytes)<<4; | 133 | rate = (n_inbytes - e->last_inbytes) << 4; |
94 | e->last_inbytes = n_inbytes; | 134 | e->last_inbytes = n_inbytes; |
95 | e->inbps += ((long)rate - (long)e->inbps)>>2; | 135 | e->inbps += ((long)rate - (long)e->inbps) >> 2; |
96 | s->ustats.inbps = (e->inbps+0xF)>>5; | 136 | s->ustats.inbps = (e->inbps + 0xF) >> 5; |
97 | 137 | ||
98 | rate = (n_outbytes - e->last_outbytes)<<4; | 138 | rate = (n_outbytes - e->last_outbytes) << 4; |
99 | e->last_outbytes = n_outbytes; | 139 | e->last_outbytes = n_outbytes; |
100 | e->outbps += ((long)rate - (long)e->outbps)>>2; | 140 | e->outbps += ((long)rate - (long)e->outbps) >> 2; |
101 | s->ustats.outbps = (e->outbps+0xF)>>5; | 141 | s->ustats.outbps = (e->outbps + 0xF) >> 5; |
102 | spin_unlock(&s->lock); | 142 | spin_unlock(&s->lock); |
103 | } | 143 | } |
104 | spin_unlock(&est_lock); | 144 | spin_unlock(&ipvs->est_lock); |
105 | mod_timer(&est_timer, jiffies + 2*HZ); | 145 | mod_timer(&ipvs->est_timer, jiffies + 2*HZ); |
106 | } | 146 | } |
107 | 147 | ||
108 | void ip_vs_new_estimator(struct ip_vs_stats *stats) | 148 | void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) |
109 | { | 149 | { |
150 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
110 | struct ip_vs_estimator *est = &stats->est; | 151 | struct ip_vs_estimator *est = &stats->est; |
111 | 152 | ||
112 | INIT_LIST_HEAD(&est->list); | 153 | INIT_LIST_HEAD(&est->list); |
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) | |||
126 | est->last_outbytes = stats->ustats.outbytes; | 167 | est->last_outbytes = stats->ustats.outbytes; |
127 | est->outbps = stats->ustats.outbps<<5; | 168 | est->outbps = stats->ustats.outbps<<5; |
128 | 169 | ||
129 | spin_lock_bh(&est_lock); | 170 | spin_lock_bh(&ipvs->est_lock); |
130 | list_add(&est->list, &est_list); | 171 | list_add(&est->list, &ipvs->est_list); |
131 | spin_unlock_bh(&est_lock); | 172 | spin_unlock_bh(&ipvs->est_lock); |
132 | } | 173 | } |
133 | 174 | ||
134 | void ip_vs_kill_estimator(struct ip_vs_stats *stats) | 175 | void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) |
135 | { | 176 | { |
177 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
136 | struct ip_vs_estimator *est = &stats->est; | 178 | struct ip_vs_estimator *est = &stats->est; |
137 | 179 | ||
138 | spin_lock_bh(&est_lock); | 180 | spin_lock_bh(&ipvs->est_lock); |
139 | list_del(&est->list); | 181 | list_del(&est->list); |
140 | spin_unlock_bh(&est_lock); | 182 | spin_unlock_bh(&ipvs->est_lock); |
141 | } | 183 | } |
142 | 184 | ||
143 | void ip_vs_zero_estimator(struct ip_vs_stats *stats) | 185 | void ip_vs_zero_estimator(struct ip_vs_stats *stats) |
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) | |||
157 | est->outbps = 0; | 199 | est->outbps = 0; |
158 | } | 200 | } |
159 | 201 | ||
160 | int __init ip_vs_estimator_init(void) | 202 | static int __net_init __ip_vs_estimator_init(struct net *net) |
161 | { | 203 | { |
162 | mod_timer(&est_timer, jiffies + 2 * HZ); | 204 | struct netns_ipvs *ipvs = net_ipvs(net); |
205 | |||
206 | INIT_LIST_HEAD(&ipvs->est_list); | ||
207 | spin_lock_init(&ipvs->est_lock); | ||
208 | setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); | ||
209 | mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); | ||
163 | return 0; | 210 | return 0; |
164 | } | 211 | } |
165 | 212 | ||
213 | static void __net_exit __ip_vs_estimator_exit(struct net *net) | ||
214 | { | ||
215 | del_timer_sync(&net_ipvs(net)->est_timer); | ||
216 | } | ||
217 | static struct pernet_operations ip_vs_app_ops = { | ||
218 | .init = __ip_vs_estimator_init, | ||
219 | .exit = __ip_vs_estimator_exit, | ||
220 | }; | ||
221 | |||
222 | int __init ip_vs_estimator_init(void) | ||
223 | { | ||
224 | int rv; | ||
225 | |||
226 | rv = register_pernet_subsys(&ip_vs_app_ops); | ||
227 | return rv; | ||
228 | } | ||
229 | |||
166 | void ip_vs_estimator_cleanup(void) | 230 | void ip_vs_estimator_cleanup(void) |
167 | { | 231 | { |
168 | del_timer_sync(&est_timer); | 232 | unregister_pernet_subsys(&ip_vs_app_ops); |
169 | } | 233 | } |
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 75455000ad1c..6b5dd6ddaae9 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
157 | int ret = 0; | 157 | int ret = 0; |
158 | enum ip_conntrack_info ctinfo; | 158 | enum ip_conntrack_info ctinfo; |
159 | struct nf_conn *ct; | 159 | struct nf_conn *ct; |
160 | struct net *net; | ||
160 | 161 | ||
161 | #ifdef CONFIG_IP_VS_IPV6 | 162 | #ifdef CONFIG_IP_VS_IPV6 |
162 | /* This application helper doesn't work with IPv6 yet, | 163 | /* This application helper doesn't work with IPv6 yet, |
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
197 | */ | 198 | */ |
198 | { | 199 | { |
199 | struct ip_vs_conn_param p; | 200 | struct ip_vs_conn_param p; |
200 | ip_vs_conn_fill_param(AF_INET, iph->protocol, | 201 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, |
201 | &from, port, &cp->caddr, 0, &p); | 202 | iph->protocol, &from, port, |
203 | &cp->caddr, 0, &p); | ||
202 | n_cp = ip_vs_conn_out_get(&p); | 204 | n_cp = ip_vs_conn_out_get(&p); |
203 | } | 205 | } |
204 | if (!n_cp) { | 206 | if (!n_cp) { |
205 | struct ip_vs_conn_param p; | 207 | struct ip_vs_conn_param p; |
206 | ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, | 208 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), |
209 | AF_INET, IPPROTO_TCP, &cp->caddr, | ||
207 | 0, &cp->vaddr, port, &p); | 210 | 0, &cp->vaddr, port, &p); |
208 | n_cp = ip_vs_conn_new(&p, &from, port, | 211 | n_cp = ip_vs_conn_new(&p, &from, port, |
209 | IP_VS_CONN_F_NO_CPORT | | 212 | IP_VS_CONN_F_NO_CPORT | |
210 | IP_VS_CONN_F_NFCT, | 213 | IP_VS_CONN_F_NFCT, |
211 | cp->dest); | 214 | cp->dest, skb->mark); |
212 | if (!n_cp) | 215 | if (!n_cp) |
213 | return 0; | 216 | return 0; |
214 | 217 | ||
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
257 | * would be adjusted twice. | 260 | * would be adjusted twice. |
258 | */ | 261 | */ |
259 | 262 | ||
263 | net = skb_net(skb); | ||
260 | cp->app_data = NULL; | 264 | cp->app_data = NULL; |
261 | ip_vs_tcp_conn_listen(n_cp); | 265 | ip_vs_tcp_conn_listen(net, n_cp); |
262 | ip_vs_conn_put(n_cp); | 266 | ip_vs_conn_put(n_cp); |
263 | return ret; | 267 | return ret; |
264 | } | 268 | } |
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
287 | union nf_inet_addr to; | 291 | union nf_inet_addr to; |
288 | __be16 port; | 292 | __be16 port; |
289 | struct ip_vs_conn *n_cp; | 293 | struct ip_vs_conn *n_cp; |
294 | struct net *net; | ||
290 | 295 | ||
291 | #ifdef CONFIG_IP_VS_IPV6 | 296 | #ifdef CONFIG_IP_VS_IPV6 |
292 | /* This application helper doesn't work with IPv6 yet, | 297 | /* This application helper doesn't work with IPv6 yet, |
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
358 | 363 | ||
359 | { | 364 | { |
360 | struct ip_vs_conn_param p; | 365 | struct ip_vs_conn_param p; |
361 | ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, | 366 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, |
362 | &cp->vaddr, htons(ntohs(cp->vport)-1), | 367 | iph->protocol, &to, port, &cp->vaddr, |
363 | &p); | 368 | htons(ntohs(cp->vport)-1), &p); |
364 | n_cp = ip_vs_conn_in_get(&p); | 369 | n_cp = ip_vs_conn_in_get(&p); |
365 | if (!n_cp) { | 370 | if (!n_cp) { |
366 | n_cp = ip_vs_conn_new(&p, &cp->daddr, | 371 | n_cp = ip_vs_conn_new(&p, &cp->daddr, |
367 | htons(ntohs(cp->dport)-1), | 372 | htons(ntohs(cp->dport)-1), |
368 | IP_VS_CONN_F_NFCT, cp->dest); | 373 | IP_VS_CONN_F_NFCT, cp->dest, |
374 | skb->mark); | ||
369 | if (!n_cp) | 375 | if (!n_cp) |
370 | return 0; | 376 | return 0; |
371 | 377 | ||
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
377 | /* | 383 | /* |
378 | * Move tunnel to listen state | 384 | * Move tunnel to listen state |
379 | */ | 385 | */ |
380 | ip_vs_tcp_conn_listen(n_cp); | 386 | net = skb_net(skb); |
387 | ip_vs_tcp_conn_listen(net, n_cp); | ||
381 | ip_vs_conn_put(n_cp); | 388 | ip_vs_conn_put(n_cp); |
382 | 389 | ||
383 | return 1; | 390 | return 1; |
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = { | |||
398 | .pkt_in = ip_vs_ftp_in, | 405 | .pkt_in = ip_vs_ftp_in, |
399 | }; | 406 | }; |
400 | 407 | ||
401 | |||
402 | /* | 408 | /* |
403 | * ip_vs_ftp initialization | 409 | * per netns ip_vs_ftp initialization |
404 | */ | 410 | */ |
405 | static int __init ip_vs_ftp_init(void) | 411 | static int __net_init __ip_vs_ftp_init(struct net *net) |
406 | { | 412 | { |
407 | int i, ret; | 413 | int i, ret; |
408 | struct ip_vs_app *app = &ip_vs_ftp; | 414 | struct ip_vs_app *app = &ip_vs_ftp; |
409 | 415 | ||
410 | ret = register_ip_vs_app(app); | 416 | ret = register_ip_vs_app(net, app); |
411 | if (ret) | 417 | if (ret) |
412 | return ret; | 418 | return ret; |
413 | 419 | ||
414 | for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { | 420 | for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { |
415 | if (!ports[i]) | 421 | if (!ports[i]) |
416 | continue; | 422 | continue; |
417 | ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); | 423 | ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); |
418 | if (ret) | 424 | if (ret) |
419 | break; | 425 | break; |
420 | pr_info("%s: loaded support on port[%d] = %d\n", | 426 | pr_info("%s: loaded support on port[%d] = %d\n", |
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void) | |||
422 | } | 428 | } |
423 | 429 | ||
424 | if (ret) | 430 | if (ret) |
425 | unregister_ip_vs_app(app); | 431 | unregister_ip_vs_app(net, app); |
426 | 432 | ||
427 | return ret; | 433 | return ret; |
428 | } | 434 | } |
435 | /* | ||
436 | * netns exit | ||
437 | */ | ||
438 | static void __ip_vs_ftp_exit(struct net *net) | ||
439 | { | ||
440 | struct ip_vs_app *app = &ip_vs_ftp; | ||
441 | |||
442 | unregister_ip_vs_app(net, app); | ||
443 | } | ||
444 | |||
445 | static struct pernet_operations ip_vs_ftp_ops = { | ||
446 | .init = __ip_vs_ftp_init, | ||
447 | .exit = __ip_vs_ftp_exit, | ||
448 | }; | ||
429 | 449 | ||
450 | int __init ip_vs_ftp_init(void) | ||
451 | { | ||
452 | int rv; | ||
453 | |||
454 | rv = register_pernet_subsys(&ip_vs_ftp_ops); | ||
455 | return rv; | ||
456 | } | ||
430 | 457 | ||
431 | /* | 458 | /* |
432 | * ip_vs_ftp finish. | 459 | * ip_vs_ftp finish. |
433 | */ | 460 | */ |
434 | static void __exit ip_vs_ftp_exit(void) | 461 | static void __exit ip_vs_ftp_exit(void) |
435 | { | 462 | { |
436 | unregister_ip_vs_app(&ip_vs_ftp); | 463 | unregister_pernet_subsys(&ip_vs_ftp_ops); |
437 | } | 464 | } |
438 | 465 | ||
439 | 466 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9323f8944199..d5bec3371871 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
@@ -70,7 +70,6 @@ | |||
70 | * entries that haven't been touched for a day. | 70 | * entries that haven't been touched for a day. |
71 | */ | 71 | */ |
72 | #define COUNT_FOR_FULL_EXPIRATION 30 | 72 | #define COUNT_FOR_FULL_EXPIRATION 30 |
73 | static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; | ||
74 | 73 | ||
75 | 74 | ||
76 | /* | 75 | /* |
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table { | |||
117 | static ctl_table vs_vars_table[] = { | 116 | static ctl_table vs_vars_table[] = { |
118 | { | 117 | { |
119 | .procname = "lblc_expiration", | 118 | .procname = "lblc_expiration", |
120 | .data = &sysctl_ip_vs_lblc_expiration, | 119 | .data = NULL, |
121 | .maxlen = sizeof(int), | 120 | .maxlen = sizeof(int), |
122 | .mode = 0644, | 121 | .mode = 0644, |
123 | .proc_handler = proc_dointvec_jiffies, | 122 | .proc_handler = proc_dointvec_jiffies, |
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = { | |||
125 | { } | 124 | { } |
126 | }; | 125 | }; |
127 | 126 | ||
128 | static struct ctl_table_header * sysctl_header; | ||
129 | |||
130 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 127 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) |
131 | { | 128 | { |
132 | list_del(&en->list); | 129 | list_del(&en->list); |
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
248 | struct ip_vs_lblc_entry *en, *nxt; | 245 | struct ip_vs_lblc_entry *en, *nxt; |
249 | unsigned long now = jiffies; | 246 | unsigned long now = jiffies; |
250 | int i, j; | 247 | int i, j; |
248 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
251 | 249 | ||
252 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 250 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
253 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 251 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
255 | write_lock(&svc->sched_lock); | 253 | write_lock(&svc->sched_lock); |
256 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 254 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
257 | if (time_before(now, | 255 | if (time_before(now, |
258 | en->lastuse + sysctl_ip_vs_lblc_expiration)) | 256 | en->lastuse + |
257 | ipvs->sysctl_lblc_expiration)) | ||
259 | continue; | 258 | continue; |
260 | 259 | ||
261 | ip_vs_lblc_free(en); | 260 | ip_vs_lblc_free(en); |
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = | |||
543 | .schedule = ip_vs_lblc_schedule, | 542 | .schedule = ip_vs_lblc_schedule, |
544 | }; | 543 | }; |
545 | 544 | ||
545 | /* | ||
546 | * per netns init. | ||
547 | */ | ||
548 | static int __net_init __ip_vs_lblc_init(struct net *net) | ||
549 | { | ||
550 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
551 | |||
552 | if (!net_eq(net, &init_net)) { | ||
553 | ipvs->lblc_ctl_table = kmemdup(vs_vars_table, | ||
554 | sizeof(vs_vars_table), | ||
555 | GFP_KERNEL); | ||
556 | if (ipvs->lblc_ctl_table == NULL) | ||
557 | goto err_dup; | ||
558 | } else | ||
559 | ipvs->lblc_ctl_table = vs_vars_table; | ||
560 | ipvs->sysctl_lblc_expiration = 24*60*60*HZ; | ||
561 | ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; | ||
562 | |||
563 | ipvs->lblc_ctl_header = | ||
564 | register_net_sysctl_table(net, net_vs_ctl_path, | ||
565 | ipvs->lblc_ctl_table); | ||
566 | if (!ipvs->lblc_ctl_header) | ||
567 | goto err_reg; | ||
568 | |||
569 | return 0; | ||
570 | |||
571 | err_reg: | ||
572 | if (!net_eq(net, &init_net)) | ||
573 | kfree(ipvs->lblc_ctl_table); | ||
574 | |||
575 | err_dup: | ||
576 | return -ENOMEM; | ||
577 | } | ||
578 | |||
579 | static void __net_exit __ip_vs_lblc_exit(struct net *net) | ||
580 | { | ||
581 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
582 | |||
583 | unregister_net_sysctl_table(ipvs->lblc_ctl_header); | ||
584 | |||
585 | if (!net_eq(net, &init_net)) | ||
586 | kfree(ipvs->lblc_ctl_table); | ||
587 | } | ||
588 | |||
589 | static struct pernet_operations ip_vs_lblc_ops = { | ||
590 | .init = __ip_vs_lblc_init, | ||
591 | .exit = __ip_vs_lblc_exit, | ||
592 | }; | ||
546 | 593 | ||
547 | static int __init ip_vs_lblc_init(void) | 594 | static int __init ip_vs_lblc_init(void) |
548 | { | 595 | { |
549 | int ret; | 596 | int ret; |
550 | 597 | ||
551 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | 598 | ret = register_pernet_subsys(&ip_vs_lblc_ops); |
599 | if (ret) | ||
600 | return ret; | ||
601 | |||
552 | ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 602 | ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
553 | if (ret) | 603 | if (ret) |
554 | unregister_sysctl_table(sysctl_header); | 604 | unregister_pernet_subsys(&ip_vs_lblc_ops); |
555 | return ret; | 605 | return ret; |
556 | } | 606 | } |
557 | 607 | ||
558 | |||
559 | static void __exit ip_vs_lblc_cleanup(void) | 608 | static void __exit ip_vs_lblc_cleanup(void) |
560 | { | 609 | { |
561 | unregister_sysctl_table(sysctl_header); | ||
562 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 610 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
611 | unregister_pernet_subsys(&ip_vs_lblc_ops); | ||
563 | } | 612 | } |
564 | 613 | ||
565 | 614 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index dbeed8ea421a..61ae8cfcf0b4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
@@ -70,8 +70,6 @@ | |||
70 | * entries that haven't been touched for a day. | 70 | * entries that haven't been touched for a day. |
71 | */ | 71 | */ |
72 | #define COUNT_FOR_FULL_EXPIRATION 30 | 72 | #define COUNT_FOR_FULL_EXPIRATION 30 |
73 | static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; | ||
74 | |||
75 | 73 | ||
76 | /* | 74 | /* |
77 | * for IPVS lblcr entry hash table | 75 | * for IPVS lblcr entry hash table |
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table { | |||
296 | static ctl_table vs_vars_table[] = { | 294 | static ctl_table vs_vars_table[] = { |
297 | { | 295 | { |
298 | .procname = "lblcr_expiration", | 296 | .procname = "lblcr_expiration", |
299 | .data = &sysctl_ip_vs_lblcr_expiration, | 297 | .data = NULL, |
300 | .maxlen = sizeof(int), | 298 | .maxlen = sizeof(int), |
301 | .mode = 0644, | 299 | .mode = 0644, |
302 | .proc_handler = proc_dointvec_jiffies, | 300 | .proc_handler = proc_dointvec_jiffies, |
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = { | |||
304 | { } | 302 | { } |
305 | }; | 303 | }; |
306 | 304 | ||
307 | static struct ctl_table_header * sysctl_header; | ||
308 | |||
309 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) | 305 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) |
310 | { | 306 | { |
311 | list_del(&en->list); | 307 | list_del(&en->list); |
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) | |||
425 | unsigned long now = jiffies; | 421 | unsigned long now = jiffies; |
426 | int i, j; | 422 | int i, j; |
427 | struct ip_vs_lblcr_entry *en, *nxt; | 423 | struct ip_vs_lblcr_entry *en, *nxt; |
424 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
428 | 425 | ||
429 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 426 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
430 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 427 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
431 | 428 | ||
432 | write_lock(&svc->sched_lock); | 429 | write_lock(&svc->sched_lock); |
433 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 430 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
434 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, | 431 | if (time_after(en->lastuse |
435 | now)) | 432 | + ipvs->sysctl_lblcr_expiration, now)) |
436 | continue; | 433 | continue; |
437 | 434 | ||
438 | ip_vs_lblcr_free(en); | 435 | ip_vs_lblcr_free(en); |
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
664 | read_lock(&svc->sched_lock); | 661 | read_lock(&svc->sched_lock); |
665 | en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); | 662 | en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); |
666 | if (en) { | 663 | if (en) { |
664 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
667 | /* We only hold a read lock, but this is atomic */ | 665 | /* We only hold a read lock, but this is atomic */ |
668 | en->lastuse = jiffies; | 666 | en->lastuse = jiffies; |
669 | 667 | ||
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
675 | /* More than one destination + enough time passed by, cleanup */ | 673 | /* More than one destination + enough time passed by, cleanup */ |
676 | if (atomic_read(&en->set.size) > 1 && | 674 | if (atomic_read(&en->set.size) > 1 && |
677 | time_after(jiffies, en->set.lastmod + | 675 | time_after(jiffies, en->set.lastmod + |
678 | sysctl_ip_vs_lblcr_expiration)) { | 676 | ipvs->sysctl_lblcr_expiration)) { |
679 | struct ip_vs_dest *m; | 677 | struct ip_vs_dest *m; |
680 | 678 | ||
681 | write_lock(&en->set.lock); | 679 | write_lock(&en->set.lock); |
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = | |||
744 | .schedule = ip_vs_lblcr_schedule, | 742 | .schedule = ip_vs_lblcr_schedule, |
745 | }; | 743 | }; |
746 | 744 | ||
745 | /* | ||
746 | * per netns init. | ||
747 | */ | ||
748 | static int __net_init __ip_vs_lblcr_init(struct net *net) | ||
749 | { | ||
750 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
751 | |||
752 | if (!net_eq(net, &init_net)) { | ||
753 | ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, | ||
754 | sizeof(vs_vars_table), | ||
755 | GFP_KERNEL); | ||
756 | if (ipvs->lblcr_ctl_table == NULL) | ||
757 | goto err_dup; | ||
758 | } else | ||
759 | ipvs->lblcr_ctl_table = vs_vars_table; | ||
760 | ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; | ||
761 | ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; | ||
762 | |||
763 | ipvs->lblcr_ctl_header = | ||
764 | register_net_sysctl_table(net, net_vs_ctl_path, | ||
765 | ipvs->lblcr_ctl_table); | ||
766 | if (!ipvs->lblcr_ctl_header) | ||
767 | goto err_reg; | ||
768 | |||
769 | return 0; | ||
770 | |||
771 | err_reg: | ||
772 | if (!net_eq(net, &init_net)) | ||
773 | kfree(ipvs->lblcr_ctl_table); | ||
774 | |||
775 | err_dup: | ||
776 | return -ENOMEM; | ||
777 | } | ||
778 | |||
779 | static void __net_exit __ip_vs_lblcr_exit(struct net *net) | ||
780 | { | ||
781 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
782 | |||
783 | unregister_net_sysctl_table(ipvs->lblcr_ctl_header); | ||
784 | |||
785 | if (!net_eq(net, &init_net)) | ||
786 | kfree(ipvs->lblcr_ctl_table); | ||
787 | } | ||
788 | |||
789 | static struct pernet_operations ip_vs_lblcr_ops = { | ||
790 | .init = __ip_vs_lblcr_init, | ||
791 | .exit = __ip_vs_lblcr_exit, | ||
792 | }; | ||
747 | 793 | ||
748 | static int __init ip_vs_lblcr_init(void) | 794 | static int __init ip_vs_lblcr_init(void) |
749 | { | 795 | { |
750 | int ret; | 796 | int ret; |
751 | 797 | ||
752 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | 798 | ret = register_pernet_subsys(&ip_vs_lblcr_ops); |
799 | if (ret) | ||
800 | return ret; | ||
801 | |||
753 | ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 802 | ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
754 | if (ret) | 803 | if (ret) |
755 | unregister_sysctl_table(sysctl_header); | 804 | unregister_pernet_subsys(&ip_vs_lblcr_ops); |
756 | return ret; | 805 | return ret; |
757 | } | 806 | } |
758 | 807 | ||
759 | |||
760 | static void __exit ip_vs_lblcr_cleanup(void) | 808 | static void __exit ip_vs_lblcr_cleanup(void) |
761 | { | 809 | { |
762 | unregister_sysctl_table(sysctl_header); | ||
763 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 810 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
811 | unregister_pernet_subsys(&ip_vs_lblcr_ops); | ||
764 | } | 812 | } |
765 | 813 | ||
766 | 814 | ||
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647cd450..f454c80df0a7 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c | |||
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |||
141 | struct nf_conntrack_tuple *orig, new_reply; | 141 | struct nf_conntrack_tuple *orig, new_reply; |
142 | struct ip_vs_conn *cp; | 142 | struct ip_vs_conn *cp; |
143 | struct ip_vs_conn_param p; | 143 | struct ip_vs_conn_param p; |
144 | struct net *net = nf_ct_net(ct); | ||
144 | 145 | ||
145 | if (exp->tuple.src.l3num != PF_INET) | 146 | if (exp->tuple.src.l3num != PF_INET) |
146 | return; | 147 | return; |
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |||
155 | 156 | ||
156 | /* RS->CLIENT */ | 157 | /* RS->CLIENT */ |
157 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | 158 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
158 | ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, | 159 | ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, |
159 | &orig->src.u3, orig->src.u.tcp.port, | 160 | &orig->src.u3, orig->src.u.tcp.port, |
160 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | 161 | &orig->dst.u3, orig->dst.u.tcp.port, &p); |
161 | cp = ip_vs_conn_out_get(&p); | 162 | cp = ip_vs_conn_out_get(&p); |
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |||
268 | " for conn " FMT_CONN "\n", | 269 | " for conn " FMT_CONN "\n", |
269 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | 270 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); |
270 | 271 | ||
271 | h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); | 272 | h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, |
273 | &tuple); | ||
272 | if (h) { | 274 | if (h) { |
273 | ct = nf_ct_tuplehash_to_ctrack(h); | 275 | ct = nf_ct_tuplehash_to_ctrack(h); |
274 | /* Show what happens instead of calling nf_ct_kill() */ | 276 | /* Show what happens instead of calling nf_ct_kill() */ |
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 3414af70ee12..5cf859ccb31b 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c | |||
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) | |||
29 | } | 29 | } |
30 | 30 | ||
31 | /* Get pe in the pe list by name */ | 31 | /* Get pe in the pe list by name */ |
32 | static struct ip_vs_pe * | 32 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) |
33 | ip_vs_pe_getbyname(const char *pe_name) | ||
34 | { | 33 | { |
35 | struct ip_vs_pe *pe; | 34 | struct ip_vs_pe *pe; |
36 | 35 | ||
37 | IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, | 36 | IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, |
38 | pe_name); | 37 | pe_name); |
39 | 38 | ||
40 | spin_lock_bh(&ip_vs_pe_lock); | 39 | spin_lock_bh(&ip_vs_pe_lock); |
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name) | |||
60 | } | 59 | } |
61 | 60 | ||
62 | /* Lookup pe and try to load it if it doesn't exist */ | 61 | /* Lookup pe and try to load it if it doesn't exist */ |
63 | struct ip_vs_pe *ip_vs_pe_get(const char *name) | 62 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) |
64 | { | 63 | { |
65 | struct ip_vs_pe *pe; | 64 | struct ip_vs_pe *pe; |
66 | 65 | ||
67 | /* Search for the pe by name */ | 66 | /* Search for the pe by name */ |
68 | pe = ip_vs_pe_getbyname(name); | 67 | pe = __ip_vs_pe_getbyname(name); |
69 | 68 | ||
70 | /* If pe not found, load the module and search again */ | 69 | /* If pe not found, load the module and search again */ |
71 | if (!pe) { | 70 | if (!pe) { |
72 | request_module("ip_vs_pe_%s", name); | 71 | request_module("ip_vs_pe_%s", name); |
73 | pe = ip_vs_pe_getbyname(name); | 72 | pe = __ip_vs_pe_getbyname(name); |
74 | } | 73 | } |
75 | 74 | ||
76 | return pe; | 75 | return pe; |
77 | } | 76 | } |
78 | 77 | ||
79 | void ip_vs_pe_put(struct ip_vs_pe *pe) | ||
80 | { | ||
81 | if (pe && pe->module) | ||
82 | module_put(pe->module); | ||
83 | } | ||
84 | |||
85 | /* Register a pe in the pe list */ | 78 | /* Register a pe in the pe list */ |
86 | int register_ip_vs_pe(struct ip_vs_pe *pe) | 79 | int register_ip_vs_pe(struct ip_vs_pe *pe) |
87 | { | 80 | { |
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index b8b4e9620f3e..0d83bc01fed4 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c | |||
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) | |||
71 | struct ip_vs_iphdr iph; | 71 | struct ip_vs_iphdr iph; |
72 | unsigned int dataoff, datalen, matchoff, matchlen; | 72 | unsigned int dataoff, datalen, matchoff, matchlen; |
73 | const char *dptr; | 73 | const char *dptr; |
74 | int retc; | ||
74 | 75 | ||
75 | ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); | 76 | ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); |
76 | 77 | ||
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) | |||
83 | if (dataoff >= skb->len) | 84 | if (dataoff >= skb->len) |
84 | return -EINVAL; | 85 | return -EINVAL; |
85 | 86 | ||
87 | if ((retc=skb_linearize(skb)) < 0) | ||
88 | return retc; | ||
86 | dptr = skb->data + dataoff; | 89 | dptr = skb->data + dataoff; |
87 | datalen = skb->len - dataoff; | 90 | datalen = skb->len - dataoff; |
88 | 91 | ||
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index c53998390877..6ac986cdcff3 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c | |||
@@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) | |||
60 | return 0; | 60 | return 0; |
61 | } | 61 | } |
62 | 62 | ||
63 | /* | ||
64 | * register an ipvs protocols netns related data | ||
65 | */ | ||
66 | static int | ||
67 | register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) | ||
68 | { | ||
69 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
70 | unsigned hash = IP_VS_PROTO_HASH(pp->protocol); | ||
71 | struct ip_vs_proto_data *pd = | ||
72 | kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); | ||
73 | |||
74 | if (!pd) { | ||
75 | pr_err("%s(): no memory.\n", __func__); | ||
76 | return -ENOMEM; | ||
77 | } | ||
78 | pd->pp = pp; /* For speed issues */ | ||
79 | pd->next = ipvs->proto_data_table[hash]; | ||
80 | ipvs->proto_data_table[hash] = pd; | ||
81 | atomic_set(&pd->appcnt, 0); /* Init app counter */ | ||
82 | |||
83 | if (pp->init_netns != NULL) | ||
84 | pp->init_netns(net, pd); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
63 | 88 | ||
64 | /* | 89 | /* |
65 | * unregister an ipvs protocol | 90 | * unregister an ipvs protocol |
@@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) | |||
82 | return -ESRCH; | 107 | return -ESRCH; |
83 | } | 108 | } |
84 | 109 | ||
110 | /* | ||
111 | * unregister an ipvs protocols netns data | ||
112 | */ | ||
113 | static int | ||
114 | unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) | ||
115 | { | ||
116 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
117 | struct ip_vs_proto_data **pd_p; | ||
118 | unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); | ||
119 | |||
120 | pd_p = &ipvs->proto_data_table[hash]; | ||
121 | for (; *pd_p; pd_p = &(*pd_p)->next) { | ||
122 | if (*pd_p == pd) { | ||
123 | *pd_p = pd->next; | ||
124 | if (pd->pp->exit_netns != NULL) | ||
125 | pd->pp->exit_netns(net, pd); | ||
126 | kfree(pd); | ||
127 | return 0; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | return -ESRCH; | ||
132 | } | ||
85 | 133 | ||
86 | /* | 134 | /* |
87 | * get ip_vs_protocol object by its proto. | 135 | * get ip_vs_protocol object by its proto. |
@@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) | |||
100 | } | 148 | } |
101 | EXPORT_SYMBOL(ip_vs_proto_get); | 149 | EXPORT_SYMBOL(ip_vs_proto_get); |
102 | 150 | ||
151 | /* | ||
152 | * get ip_vs_protocol object data by netns and proto | ||
153 | */ | ||
154 | struct ip_vs_proto_data * | ||
155 | __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) | ||
156 | { | ||
157 | struct ip_vs_proto_data *pd; | ||
158 | unsigned hash = IP_VS_PROTO_HASH(proto); | ||
159 | |||
160 | for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { | ||
161 | if (pd->pp->protocol == proto) | ||
162 | return pd; | ||
163 | } | ||
164 | |||
165 | return NULL; | ||
166 | } | ||
167 | |||
168 | struct ip_vs_proto_data * | ||
169 | ip_vs_proto_data_get(struct net *net, unsigned short proto) | ||
170 | { | ||
171 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
172 | |||
173 | return __ipvs_proto_data_get(ipvs, proto); | ||
174 | } | ||
175 | EXPORT_SYMBOL(ip_vs_proto_data_get); | ||
103 | 176 | ||
104 | /* | 177 | /* |
105 | * Propagate event for state change to all protocols | 178 | * Propagate event for state change to all protocols |
106 | */ | 179 | */ |
107 | void ip_vs_protocol_timeout_change(int flags) | 180 | void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) |
108 | { | 181 | { |
109 | struct ip_vs_protocol *pp; | 182 | struct ip_vs_proto_data *pd; |
110 | int i; | 183 | int i; |
111 | 184 | ||
112 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | 185 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
113 | for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { | 186 | for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { |
114 | if (pp->timeout_change) | 187 | if (pd->pp->timeout_change) |
115 | pp->timeout_change(pp, flags); | 188 | pd->pp->timeout_change(pd, flags); |
116 | } | 189 | } |
117 | } | 190 | } |
118 | } | 191 | } |
@@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, | |||
236 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); | 309 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); |
237 | } | 310 | } |
238 | 311 | ||
312 | /* | ||
313 | * per network name-space init | ||
314 | */ | ||
315 | static int __net_init __ip_vs_protocol_init(struct net *net) | ||
316 | { | ||
317 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
318 | register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); | ||
319 | #endif | ||
320 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
321 | register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); | ||
322 | #endif | ||
323 | #ifdef CONFIG_IP_VS_PROTO_SCTP | ||
324 | register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); | ||
325 | #endif | ||
326 | #ifdef CONFIG_IP_VS_PROTO_AH | ||
327 | register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); | ||
328 | #endif | ||
329 | #ifdef CONFIG_IP_VS_PROTO_ESP | ||
330 | register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); | ||
331 | #endif | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | static void __net_exit __ip_vs_protocol_cleanup(struct net *net) | ||
336 | { | ||
337 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
338 | struct ip_vs_proto_data *pd; | ||
339 | int i; | ||
340 | |||
341 | /* unregister all the ipvs proto data for this netns */ | ||
342 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | ||
343 | while ((pd = ipvs->proto_data_table[i]) != NULL) | ||
344 | unregister_ip_vs_proto_netns(net, pd); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static struct pernet_operations ipvs_proto_ops = { | ||
349 | .init = __ip_vs_protocol_init, | ||
350 | .exit = __ip_vs_protocol_cleanup, | ||
351 | }; | ||
239 | 352 | ||
240 | int __init ip_vs_protocol_init(void) | 353 | int __init ip_vs_protocol_init(void) |
241 | { | 354 | { |
@@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void) | |||
265 | REGISTER_PROTOCOL(&ip_vs_protocol_esp); | 378 | REGISTER_PROTOCOL(&ip_vs_protocol_esp); |
266 | #endif | 379 | #endif |
267 | pr_info("Registered protocols (%s)\n", &protocols[2]); | 380 | pr_info("Registered protocols (%s)\n", &protocols[2]); |
381 | return register_pernet_subsys(&ipvs_proto_ops); | ||
268 | 382 | ||
269 | return 0; | 383 | return 0; |
270 | } | 384 | } |
@@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void) | |||
275 | struct ip_vs_protocol *pp; | 389 | struct ip_vs_protocol *pp; |
276 | int i; | 390 | int i; |
277 | 391 | ||
392 | unregister_pernet_subsys(&ipvs_proto_ops); | ||
278 | /* unregister all the ipvs protocols */ | 393 | /* unregister all the ipvs protocols */ |
279 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | 394 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
280 | while ((pp = ip_vs_proto_table[i]) != NULL) | 395 | while ((pp = ip_vs_proto_table[i]) != NULL) |
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 3a0461117d3f..5b8eb8b12c3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c | |||
@@ -41,28 +41,30 @@ struct isakmp_hdr { | |||
41 | #define PORT_ISAKMP 500 | 41 | #define PORT_ISAKMP 500 |
42 | 42 | ||
43 | static void | 43 | static void |
44 | ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, | 44 | ah_esp_conn_fill_param_proto(struct net *net, int af, |
45 | int inverse, struct ip_vs_conn_param *p) | 45 | const struct ip_vs_iphdr *iph, int inverse, |
46 | struct ip_vs_conn_param *p) | ||
46 | { | 47 | { |
47 | if (likely(!inverse)) | 48 | if (likely(!inverse)) |
48 | ip_vs_conn_fill_param(af, IPPROTO_UDP, | 49 | ip_vs_conn_fill_param(net, af, IPPROTO_UDP, |
49 | &iph->saddr, htons(PORT_ISAKMP), | 50 | &iph->saddr, htons(PORT_ISAKMP), |
50 | &iph->daddr, htons(PORT_ISAKMP), p); | 51 | &iph->daddr, htons(PORT_ISAKMP), p); |
51 | else | 52 | else |
52 | ip_vs_conn_fill_param(af, IPPROTO_UDP, | 53 | ip_vs_conn_fill_param(net, af, IPPROTO_UDP, |
53 | &iph->daddr, htons(PORT_ISAKMP), | 54 | &iph->daddr, htons(PORT_ISAKMP), |
54 | &iph->saddr, htons(PORT_ISAKMP), p); | 55 | &iph->saddr, htons(PORT_ISAKMP), p); |
55 | } | 56 | } |
56 | 57 | ||
57 | static struct ip_vs_conn * | 58 | static struct ip_vs_conn * |
58 | ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | 59 | ah_esp_conn_in_get(int af, const struct sk_buff *skb, |
59 | const struct ip_vs_iphdr *iph, unsigned int proto_off, | 60 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
60 | int inverse) | 61 | int inverse) |
61 | { | 62 | { |
62 | struct ip_vs_conn *cp; | 63 | struct ip_vs_conn *cp; |
63 | struct ip_vs_conn_param p; | 64 | struct ip_vs_conn_param p; |
65 | struct net *net = skb_net(skb); | ||
64 | 66 | ||
65 | ah_esp_conn_fill_param_proto(af, iph, inverse, &p); | 67 | ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); |
66 | cp = ip_vs_conn_in_get(&p); | 68 | cp = ip_vs_conn_in_get(&p); |
67 | if (!cp) { | 69 | if (!cp) { |
68 | /* | 70 | /* |
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
72 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " | 74 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " |
73 | "%s%s %s->%s\n", | 75 | "%s%s %s->%s\n", |
74 | inverse ? "ICMP+" : "", | 76 | inverse ? "ICMP+" : "", |
75 | pp->name, | 77 | ip_vs_proto_get(iph->protocol)->name, |
76 | IP_VS_DBG_ADDR(af, &iph->saddr), | 78 | IP_VS_DBG_ADDR(af, &iph->saddr), |
77 | IP_VS_DBG_ADDR(af, &iph->daddr)); | 79 | IP_VS_DBG_ADDR(af, &iph->daddr)); |
78 | } | 80 | } |
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
83 | 85 | ||
84 | static struct ip_vs_conn * | 86 | static struct ip_vs_conn * |
85 | ah_esp_conn_out_get(int af, const struct sk_buff *skb, | 87 | ah_esp_conn_out_get(int af, const struct sk_buff *skb, |
86 | struct ip_vs_protocol *pp, | ||
87 | const struct ip_vs_iphdr *iph, | 88 | const struct ip_vs_iphdr *iph, |
88 | unsigned int proto_off, | 89 | unsigned int proto_off, |
89 | int inverse) | 90 | int inverse) |
90 | { | 91 | { |
91 | struct ip_vs_conn *cp; | 92 | struct ip_vs_conn *cp; |
92 | struct ip_vs_conn_param p; | 93 | struct ip_vs_conn_param p; |
94 | struct net *net = skb_net(skb); | ||
93 | 95 | ||
94 | ah_esp_conn_fill_param_proto(af, iph, inverse, &p); | 96 | ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); |
95 | cp = ip_vs_conn_out_get(&p); | 97 | cp = ip_vs_conn_out_get(&p); |
96 | if (!cp) { | 98 | if (!cp) { |
97 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " | 99 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " |
98 | "%s%s %s->%s\n", | 100 | "%s%s %s->%s\n", |
99 | inverse ? "ICMP+" : "", | 101 | inverse ? "ICMP+" : "", |
100 | pp->name, | 102 | ip_vs_proto_get(iph->protocol)->name, |
101 | IP_VS_DBG_ADDR(af, &iph->saddr), | 103 | IP_VS_DBG_ADDR(af, &iph->saddr), |
102 | IP_VS_DBG_ADDR(af, &iph->daddr)); | 104 | IP_VS_DBG_ADDR(af, &iph->daddr)); |
103 | } | 105 | } |
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, | |||
107 | 109 | ||
108 | 110 | ||
109 | static int | 111 | static int |
110 | ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 112 | ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
111 | int *verdict, struct ip_vs_conn **cpp) | 113 | int *verdict, struct ip_vs_conn **cpp) |
112 | { | 114 | { |
113 | /* | 115 | /* |
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
117 | return 0; | 119 | return 0; |
118 | } | 120 | } |
119 | 121 | ||
120 | static void ah_esp_init(struct ip_vs_protocol *pp) | ||
121 | { | ||
122 | /* nothing to do now */ | ||
123 | } | ||
124 | |||
125 | |||
126 | static void ah_esp_exit(struct ip_vs_protocol *pp) | ||
127 | { | ||
128 | /* nothing to do now */ | ||
129 | } | ||
130 | |||
131 | |||
132 | #ifdef CONFIG_IP_VS_PROTO_AH | 122 | #ifdef CONFIG_IP_VS_PROTO_AH |
133 | struct ip_vs_protocol ip_vs_protocol_ah = { | 123 | struct ip_vs_protocol ip_vs_protocol_ah = { |
134 | .name = "AH", | 124 | .name = "AH", |
135 | .protocol = IPPROTO_AH, | 125 | .protocol = IPPROTO_AH, |
136 | .num_states = 1, | 126 | .num_states = 1, |
137 | .dont_defrag = 1, | 127 | .dont_defrag = 1, |
138 | .init = ah_esp_init, | 128 | .init = NULL, |
139 | .exit = ah_esp_exit, | 129 | .exit = NULL, |
140 | .conn_schedule = ah_esp_conn_schedule, | 130 | .conn_schedule = ah_esp_conn_schedule, |
141 | .conn_in_get = ah_esp_conn_in_get, | 131 | .conn_in_get = ah_esp_conn_in_get, |
142 | .conn_out_get = ah_esp_conn_out_get, | 132 | .conn_out_get = ah_esp_conn_out_get, |
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { | |||
149 | .app_conn_bind = NULL, | 139 | .app_conn_bind = NULL, |
150 | .debug_packet = ip_vs_tcpudp_debug_packet, | 140 | .debug_packet = ip_vs_tcpudp_debug_packet, |
151 | .timeout_change = NULL, /* ISAKMP */ | 141 | .timeout_change = NULL, /* ISAKMP */ |
152 | .set_state_timeout = NULL, | ||
153 | }; | 142 | }; |
154 | #endif | 143 | #endif |
155 | 144 | ||
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = { | |||
159 | .protocol = IPPROTO_ESP, | 148 | .protocol = IPPROTO_ESP, |
160 | .num_states = 1, | 149 | .num_states = 1, |
161 | .dont_defrag = 1, | 150 | .dont_defrag = 1, |
162 | .init = ah_esp_init, | 151 | .init = NULL, |
163 | .exit = ah_esp_exit, | 152 | .exit = NULL, |
164 | .conn_schedule = ah_esp_conn_schedule, | 153 | .conn_schedule = ah_esp_conn_schedule, |
165 | .conn_in_get = ah_esp_conn_in_get, | 154 | .conn_in_get = ah_esp_conn_in_get, |
166 | .conn_out_get = ah_esp_conn_out_get, | 155 | .conn_out_get = ah_esp_conn_out_get, |
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1ea96bcd342b..fb2d04ac5d4e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c | |||
@@ -9,9 +9,10 @@ | |||
9 | #include <net/ip_vs.h> | 9 | #include <net/ip_vs.h> |
10 | 10 | ||
11 | static int | 11 | static int |
12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
13 | int *verdict, struct ip_vs_conn **cpp) | 13 | int *verdict, struct ip_vs_conn **cpp) |
14 | { | 14 | { |
15 | struct net *net; | ||
15 | struct ip_vs_service *svc; | 16 | struct ip_vs_service *svc; |
16 | sctp_chunkhdr_t _schunkh, *sch; | 17 | sctp_chunkhdr_t _schunkh, *sch; |
17 | sctp_sctphdr_t *sh, _sctph; | 18 | sctp_sctphdr_t *sh, _sctph; |
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
27 | sizeof(_schunkh), &_schunkh); | 28 | sizeof(_schunkh), &_schunkh); |
28 | if (sch == NULL) | 29 | if (sch == NULL) |
29 | return 0; | 30 | return 0; |
30 | 31 | net = skb_net(skb); | |
31 | if ((sch->type == SCTP_CID_INIT) && | 32 | if ((sch->type == SCTP_CID_INIT) && |
32 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 33 | (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
33 | &iph.daddr, sh->dest))) { | 34 | &iph.daddr, sh->dest))) { |
34 | int ignored; | 35 | int ignored; |
35 | 36 | ||
36 | if (ip_vs_todrop()) { | 37 | if (ip_vs_todrop(net_ipvs(net))) { |
37 | /* | 38 | /* |
38 | * It seems that we are very loaded. | 39 | * It seems that we are very loaded. |
39 | * We have to drop this packet :( | 40 | * We have to drop this packet :( |
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
46 | * Let the virtual server select a real server for the | 47 | * Let the virtual server select a real server for the |
47 | * incoming connection, and create a connection entry. | 48 | * incoming connection, and create a connection entry. |
48 | */ | 49 | */ |
49 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 50 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
50 | if (!*cpp && !ignored) { | 51 | if (!*cpp && ignored <= 0) { |
51 | *verdict = ip_vs_leave(svc, skb, pp); | 52 | if (!ignored) |
53 | *verdict = ip_vs_leave(svc, skb, pd); | ||
54 | else { | ||
55 | ip_vs_service_put(svc); | ||
56 | *verdict = NF_DROP; | ||
57 | } | ||
52 | return 0; | 58 | return 0; |
53 | } | 59 | } |
54 | ip_vs_service_put(svc); | 60 | ip_vs_service_put(svc); |
55 | } | 61 | } |
56 | 62 | /* NF_ACCEPT */ | |
57 | return 1; | 63 | return 1; |
58 | } | 64 | } |
59 | 65 | ||
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate | |||
856 | /* | 862 | /* |
857 | * Timeout table[state] | 863 | * Timeout table[state] |
858 | */ | 864 | */ |
859 | static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { | 865 | static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { |
860 | [IP_VS_SCTP_S_NONE] = 2 * HZ, | 866 | [IP_VS_SCTP_S_NONE] = 2 * HZ, |
861 | [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, | 867 | [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, |
862 | [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, | 868 | [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, |
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state) | |||
900 | return "?"; | 906 | return "?"; |
901 | } | 907 | } |
902 | 908 | ||
903 | static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) | ||
904 | { | ||
905 | } | ||
906 | |||
907 | static int | ||
908 | sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
909 | { | ||
910 | |||
911 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, | ||
912 | sctp_state_name_table, sname, to); | ||
913 | } | ||
914 | |||
915 | static inline int | 909 | static inline int |
916 | set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | 910 | set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
917 | int direction, const struct sk_buff *skb) | 911 | int direction, const struct sk_buff *skb) |
918 | { | 912 | { |
919 | sctp_chunkhdr_t _sctpch, *sch; | 913 | sctp_chunkhdr_t _sctpch, *sch; |
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
971 | 965 | ||
972 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" | 966 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" |
973 | "%s:%d state: %s->%s conn->refcnt:%d\n", | 967 | "%s:%d state: %s->%s conn->refcnt:%d\n", |
974 | pp->name, | 968 | pd->pp->name, |
975 | ((direction == IP_VS_DIR_OUTPUT) ? | 969 | ((direction == IP_VS_DIR_OUTPUT) ? |
976 | "output " : "input "), | 970 | "output " : "input "), |
977 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), | 971 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), |
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
995 | } | 989 | } |
996 | } | 990 | } |
997 | } | 991 | } |
992 | if (likely(pd)) | ||
993 | cp->timeout = pd->timeout_table[cp->state = next_state]; | ||
994 | else /* What to do ? */ | ||
995 | cp->timeout = sctp_timeouts[cp->state = next_state]; | ||
998 | 996 | ||
999 | cp->timeout = pp->timeout_table[cp->state = next_state]; | 997 | return 1; |
1000 | |||
1001 | return 1; | ||
1002 | } | 998 | } |
1003 | 999 | ||
1004 | static int | 1000 | static int |
1005 | sctp_state_transition(struct ip_vs_conn *cp, int direction, | 1001 | sctp_state_transition(struct ip_vs_conn *cp, int direction, |
1006 | const struct sk_buff *skb, struct ip_vs_protocol *pp) | 1002 | const struct sk_buff *skb, struct ip_vs_proto_data *pd) |
1007 | { | 1003 | { |
1008 | int ret = 0; | 1004 | int ret = 0; |
1009 | 1005 | ||
1010 | spin_lock(&cp->lock); | 1006 | spin_lock(&cp->lock); |
1011 | ret = set_sctp_state(pp, cp, direction, skb); | 1007 | ret = set_sctp_state(pd, cp, direction, skb); |
1012 | spin_unlock(&cp->lock); | 1008 | spin_unlock(&cp->lock); |
1013 | 1009 | ||
1014 | return ret; | 1010 | return ret; |
1015 | } | 1011 | } |
1016 | 1012 | ||
1017 | /* | ||
1018 | * Hash table for SCTP application incarnations | ||
1019 | */ | ||
1020 | #define SCTP_APP_TAB_BITS 4 | ||
1021 | #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) | ||
1022 | #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) | ||
1023 | |||
1024 | static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; | ||
1025 | static DEFINE_SPINLOCK(sctp_app_lock); | ||
1026 | |||
1027 | static inline __u16 sctp_app_hashkey(__be16 port) | 1013 | static inline __u16 sctp_app_hashkey(__be16 port) |
1028 | { | 1014 | { |
1029 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) | 1015 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) |
1030 | & SCTP_APP_TAB_MASK; | 1016 | & SCTP_APP_TAB_MASK; |
1031 | } | 1017 | } |
1032 | 1018 | ||
1033 | static int sctp_register_app(struct ip_vs_app *inc) | 1019 | static int sctp_register_app(struct net *net, struct ip_vs_app *inc) |
1034 | { | 1020 | { |
1035 | struct ip_vs_app *i; | 1021 | struct ip_vs_app *i; |
1036 | __u16 hash; | 1022 | __u16 hash; |
1037 | __be16 port = inc->port; | 1023 | __be16 port = inc->port; |
1038 | int ret = 0; | 1024 | int ret = 0; |
1025 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1026 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); | ||
1039 | 1027 | ||
1040 | hash = sctp_app_hashkey(port); | 1028 | hash = sctp_app_hashkey(port); |
1041 | 1029 | ||
1042 | spin_lock_bh(&sctp_app_lock); | 1030 | spin_lock_bh(&ipvs->sctp_app_lock); |
1043 | list_for_each_entry(i, &sctp_apps[hash], p_list) { | 1031 | list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { |
1044 | if (i->port == port) { | 1032 | if (i->port == port) { |
1045 | ret = -EEXIST; | 1033 | ret = -EEXIST; |
1046 | goto out; | 1034 | goto out; |
1047 | } | 1035 | } |
1048 | } | 1036 | } |
1049 | list_add(&inc->p_list, &sctp_apps[hash]); | 1037 | list_add(&inc->p_list, &ipvs->sctp_apps[hash]); |
1050 | atomic_inc(&ip_vs_protocol_sctp.appcnt); | 1038 | atomic_inc(&pd->appcnt); |
1051 | out: | 1039 | out: |
1052 | spin_unlock_bh(&sctp_app_lock); | 1040 | spin_unlock_bh(&ipvs->sctp_app_lock); |
1053 | 1041 | ||
1054 | return ret; | 1042 | return ret; |
1055 | } | 1043 | } |
1056 | 1044 | ||
1057 | static void sctp_unregister_app(struct ip_vs_app *inc) | 1045 | static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) |
1058 | { | 1046 | { |
1059 | spin_lock_bh(&sctp_app_lock); | 1047 | struct netns_ipvs *ipvs = net_ipvs(net); |
1060 | atomic_dec(&ip_vs_protocol_sctp.appcnt); | 1048 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); |
1049 | |||
1050 | spin_lock_bh(&ipvs->sctp_app_lock); | ||
1051 | atomic_dec(&pd->appcnt); | ||
1061 | list_del(&inc->p_list); | 1052 | list_del(&inc->p_list); |
1062 | spin_unlock_bh(&sctp_app_lock); | 1053 | spin_unlock_bh(&ipvs->sctp_app_lock); |
1063 | } | 1054 | } |
1064 | 1055 | ||
1065 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) | 1056 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) |
1066 | { | 1057 | { |
1058 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
1067 | int hash; | 1059 | int hash; |
1068 | struct ip_vs_app *inc; | 1060 | struct ip_vs_app *inc; |
1069 | int result = 0; | 1061 | int result = 0; |
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |||
1074 | /* Lookup application incarnations and bind the right one */ | 1066 | /* Lookup application incarnations and bind the right one */ |
1075 | hash = sctp_app_hashkey(cp->vport); | 1067 | hash = sctp_app_hashkey(cp->vport); |
1076 | 1068 | ||
1077 | spin_lock(&sctp_app_lock); | 1069 | spin_lock(&ipvs->sctp_app_lock); |
1078 | list_for_each_entry(inc, &sctp_apps[hash], p_list) { | 1070 | list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { |
1079 | if (inc->port == cp->vport) { | 1071 | if (inc->port == cp->vport) { |
1080 | if (unlikely(!ip_vs_app_inc_get(inc))) | 1072 | if (unlikely(!ip_vs_app_inc_get(inc))) |
1081 | break; | 1073 | break; |
1082 | spin_unlock(&sctp_app_lock); | 1074 | spin_unlock(&ipvs->sctp_app_lock); |
1083 | 1075 | ||
1084 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" | 1076 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" |
1085 | "%s:%u to app %s on port %u\n", | 1077 | "%s:%u to app %s on port %u\n", |
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |||
1095 | goto out; | 1087 | goto out; |
1096 | } | 1088 | } |
1097 | } | 1089 | } |
1098 | spin_unlock(&sctp_app_lock); | 1090 | spin_unlock(&ipvs->sctp_app_lock); |
1099 | out: | 1091 | out: |
1100 | return result; | 1092 | return result; |
1101 | } | 1093 | } |
1102 | 1094 | ||
1103 | static void ip_vs_sctp_init(struct ip_vs_protocol *pp) | 1095 | /* --------------------------------------------- |
1096 | * timeouts is netns related now. | ||
1097 | * --------------------------------------------- | ||
1098 | */ | ||
1099 | static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) | ||
1104 | { | 1100 | { |
1105 | IP_VS_INIT_HASH_TABLE(sctp_apps); | 1101 | struct netns_ipvs *ipvs = net_ipvs(net); |
1106 | pp->timeout_table = sctp_timeouts; | ||
1107 | } | ||
1108 | 1102 | ||
1103 | ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); | ||
1104 | spin_lock_init(&ipvs->tcp_app_lock); | ||
1105 | pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, | ||
1106 | sizeof(sctp_timeouts)); | ||
1107 | } | ||
1109 | 1108 | ||
1110 | static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) | 1109 | static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) |
1111 | { | 1110 | { |
1112 | 1111 | kfree(pd->timeout_table); | |
1113 | } | 1112 | } |
1114 | 1113 | ||
1115 | struct ip_vs_protocol ip_vs_protocol_sctp = { | 1114 | struct ip_vs_protocol ip_vs_protocol_sctp = { |
1116 | .name = "SCTP", | 1115 | .name = "SCTP", |
1117 | .protocol = IPPROTO_SCTP, | 1116 | .protocol = IPPROTO_SCTP, |
1118 | .num_states = IP_VS_SCTP_S_LAST, | 1117 | .num_states = IP_VS_SCTP_S_LAST, |
1119 | .dont_defrag = 0, | 1118 | .dont_defrag = 0, |
1120 | .appcnt = ATOMIC_INIT(0), | 1119 | .init = NULL, |
1121 | .init = ip_vs_sctp_init, | 1120 | .exit = NULL, |
1122 | .exit = ip_vs_sctp_exit, | 1121 | .init_netns = __ip_vs_sctp_init, |
1123 | .register_app = sctp_register_app, | 1122 | .exit_netns = __ip_vs_sctp_exit, |
1123 | .register_app = sctp_register_app, | ||
1124 | .unregister_app = sctp_unregister_app, | 1124 | .unregister_app = sctp_unregister_app, |
1125 | .conn_schedule = sctp_conn_schedule, | 1125 | .conn_schedule = sctp_conn_schedule, |
1126 | .conn_in_get = ip_vs_conn_in_get_proto, | 1126 | .conn_in_get = ip_vs_conn_in_get_proto, |
1127 | .conn_out_get = ip_vs_conn_out_get_proto, | 1127 | .conn_out_get = ip_vs_conn_out_get_proto, |
1128 | .snat_handler = sctp_snat_handler, | 1128 | .snat_handler = sctp_snat_handler, |
1129 | .dnat_handler = sctp_dnat_handler, | 1129 | .dnat_handler = sctp_dnat_handler, |
1130 | .csum_check = sctp_csum_check, | 1130 | .csum_check = sctp_csum_check, |
1131 | .state_name = sctp_state_name, | 1131 | .state_name = sctp_state_name, |
1132 | .state_transition = sctp_state_transition, | 1132 | .state_transition = sctp_state_transition, |
1133 | .app_conn_bind = sctp_app_conn_bind, | 1133 | .app_conn_bind = sctp_app_conn_bind, |
1134 | .debug_packet = ip_vs_tcpudp_debug_packet, | 1134 | .debug_packet = ip_vs_tcpudp_debug_packet, |
1135 | .timeout_change = sctp_timeout_change, | 1135 | .timeout_change = NULL, |
1136 | .set_state_timeout = sctp_set_state_timeout, | ||
1137 | }; | 1136 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index f6c5200e2146..c0cc341b840d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c | |||
@@ -9,8 +9,12 @@ | |||
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
13 | * | 13 | * |
14 | * Network name space (netns) aware. | ||
15 | * Global data moved to netns i.e struct netns_ipvs | ||
16 | * tcp_timeouts table has copy per netns in a hash table per | ||
17 | * protocol ip_vs_proto_data and is handled by netns | ||
14 | */ | 18 | */ |
15 | 19 | ||
16 | #define KMSG_COMPONENT "IPVS" | 20 | #define KMSG_COMPONENT "IPVS" |
@@ -28,9 +32,10 @@ | |||
28 | #include <net/ip_vs.h> | 32 | #include <net/ip_vs.h> |
29 | 33 | ||
30 | static int | 34 | static int |
31 | tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 35 | tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
32 | int *verdict, struct ip_vs_conn **cpp) | 36 | int *verdict, struct ip_vs_conn **cpp) |
33 | { | 37 | { |
38 | struct net *net; | ||
34 | struct ip_vs_service *svc; | 39 | struct ip_vs_service *svc; |
35 | struct tcphdr _tcph, *th; | 40 | struct tcphdr _tcph, *th; |
36 | struct ip_vs_iphdr iph; | 41 | struct ip_vs_iphdr iph; |
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
42 | *verdict = NF_DROP; | 47 | *verdict = NF_DROP; |
43 | return 0; | 48 | return 0; |
44 | } | 49 | } |
45 | 50 | net = skb_net(skb); | |
46 | /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ | 51 | /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ |
47 | if (th->syn && | 52 | if (th->syn && |
48 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, | 53 | (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
49 | th->dest))) { | 54 | &iph.daddr, th->dest))) { |
50 | int ignored; | 55 | int ignored; |
51 | 56 | ||
52 | if (ip_vs_todrop()) { | 57 | if (ip_vs_todrop(net_ipvs(net))) { |
53 | /* | 58 | /* |
54 | * It seems that we are very loaded. | 59 | * It seems that we are very loaded. |
55 | * We have to drop this packet :( | 60 | * We have to drop this packet :( |
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
63 | * Let the virtual server select a real server for the | 68 | * Let the virtual server select a real server for the |
64 | * incoming connection, and create a connection entry. | 69 | * incoming connection, and create a connection entry. |
65 | */ | 70 | */ |
66 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 71 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
67 | if (!*cpp && !ignored) { | 72 | if (!*cpp && ignored <= 0) { |
68 | *verdict = ip_vs_leave(svc, skb, pp); | 73 | if (!ignored) |
74 | *verdict = ip_vs_leave(svc, skb, pd); | ||
75 | else { | ||
76 | ip_vs_service_put(svc); | ||
77 | *verdict = NF_DROP; | ||
78 | } | ||
69 | return 0; | 79 | return 0; |
70 | } | 80 | } |
71 | ip_vs_service_put(svc); | 81 | ip_vs_service_put(svc); |
72 | } | 82 | } |
83 | /* NF_ACCEPT */ | ||
73 | return 1; | 84 | return 1; |
74 | } | 85 | } |
75 | 86 | ||
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { | |||
338 | /* | 349 | /* |
339 | * Timeout table[state] | 350 | * Timeout table[state] |
340 | */ | 351 | */ |
341 | static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { | 352 | static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { |
342 | [IP_VS_TCP_S_NONE] = 2*HZ, | 353 | [IP_VS_TCP_S_NONE] = 2*HZ, |
343 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, | 354 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, |
344 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, | 355 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, |
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { | |||
437 | /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, | 448 | /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, |
438 | }; | 449 | }; |
439 | 450 | ||
440 | static struct tcp_states_t *tcp_state_table = tcp_states; | 451 | static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) |
441 | |||
442 | |||
443 | static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) | ||
444 | { | 452 | { |
445 | int on = (flags & 1); /* secure_tcp */ | 453 | int on = (flags & 1); /* secure_tcp */ |
446 | 454 | ||
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) | |||
450 | ** for most if not for all of the applications. Something | 458 | ** for most if not for all of the applications. Something |
451 | ** like "capabilities" (flags) for each object. | 459 | ** like "capabilities" (flags) for each object. |
452 | */ | 460 | */ |
453 | tcp_state_table = (on? tcp_states_dos : tcp_states); | 461 | pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); |
454 | } | ||
455 | |||
456 | static int | ||
457 | tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
458 | { | ||
459 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, | ||
460 | tcp_state_name_table, sname, to); | ||
461 | } | 462 | } |
462 | 463 | ||
463 | static inline int tcp_state_idx(struct tcphdr *th) | 464 | static inline int tcp_state_idx(struct tcphdr *th) |
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th) | |||
474 | } | 475 | } |
475 | 476 | ||
476 | static inline void | 477 | static inline void |
477 | set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | 478 | set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
478 | int direction, struct tcphdr *th) | 479 | int direction, struct tcphdr *th) |
479 | { | 480 | { |
480 | int state_idx; | 481 | int state_idx; |
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
497 | goto tcp_state_out; | 498 | goto tcp_state_out; |
498 | } | 499 | } |
499 | 500 | ||
500 | new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; | 501 | new_state = |
502 | pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; | ||
501 | 503 | ||
502 | tcp_state_out: | 504 | tcp_state_out: |
503 | if (new_state != cp->state) { | 505 | if (new_state != cp->state) { |
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
505 | 507 | ||
506 | IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" | 508 | IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" |
507 | "%s:%d state: %s->%s conn->refcnt:%d\n", | 509 | "%s:%d state: %s->%s conn->refcnt:%d\n", |
508 | pp->name, | 510 | pd->pp->name, |
509 | ((state_off == TCP_DIR_OUTPUT) ? | 511 | ((state_off == TCP_DIR_OUTPUT) ? |
510 | "output " : "input "), | 512 | "output " : "input "), |
511 | th->syn ? 'S' : '.', | 513 | th->syn ? 'S' : '.', |
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
535 | } | 537 | } |
536 | } | 538 | } |
537 | 539 | ||
538 | cp->timeout = pp->timeout_table[cp->state = new_state]; | 540 | if (likely(pd)) |
541 | cp->timeout = pd->timeout_table[cp->state = new_state]; | ||
542 | else /* What to do ? */ | ||
543 | cp->timeout = tcp_timeouts[cp->state = new_state]; | ||
539 | } | 544 | } |
540 | 545 | ||
541 | |||
542 | /* | 546 | /* |
543 | * Handle state transitions | 547 | * Handle state transitions |
544 | */ | 548 | */ |
545 | static int | 549 | static int |
546 | tcp_state_transition(struct ip_vs_conn *cp, int direction, | 550 | tcp_state_transition(struct ip_vs_conn *cp, int direction, |
547 | const struct sk_buff *skb, | 551 | const struct sk_buff *skb, |
548 | struct ip_vs_protocol *pp) | 552 | struct ip_vs_proto_data *pd) |
549 | { | 553 | { |
550 | struct tcphdr _tcph, *th; | 554 | struct tcphdr _tcph, *th; |
551 | 555 | ||
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, | |||
560 | return 0; | 564 | return 0; |
561 | 565 | ||
562 | spin_lock(&cp->lock); | 566 | spin_lock(&cp->lock); |
563 | set_tcp_state(pp, cp, direction, th); | 567 | set_tcp_state(pd, cp, direction, th); |
564 | spin_unlock(&cp->lock); | 568 | spin_unlock(&cp->lock); |
565 | 569 | ||
566 | return 1; | 570 | return 1; |
567 | } | 571 | } |
568 | 572 | ||
569 | |||
570 | /* | ||
571 | * Hash table for TCP application incarnations | ||
572 | */ | ||
573 | #define TCP_APP_TAB_BITS 4 | ||
574 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
575 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
576 | |||
577 | static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
578 | static DEFINE_SPINLOCK(tcp_app_lock); | ||
579 | |||
580 | static inline __u16 tcp_app_hashkey(__be16 port) | 573 | static inline __u16 tcp_app_hashkey(__be16 port) |
581 | { | 574 | { |
582 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) | 575 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) |
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port) | |||
584 | } | 577 | } |
585 | 578 | ||
586 | 579 | ||
587 | static int tcp_register_app(struct ip_vs_app *inc) | 580 | static int tcp_register_app(struct net *net, struct ip_vs_app *inc) |
588 | { | 581 | { |
589 | struct ip_vs_app *i; | 582 | struct ip_vs_app *i; |
590 | __u16 hash; | 583 | __u16 hash; |
591 | __be16 port = inc->port; | 584 | __be16 port = inc->port; |
592 | int ret = 0; | 585 | int ret = 0; |
586 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
587 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); | ||
593 | 588 | ||
594 | hash = tcp_app_hashkey(port); | 589 | hash = tcp_app_hashkey(port); |
595 | 590 | ||
596 | spin_lock_bh(&tcp_app_lock); | 591 | spin_lock_bh(&ipvs->tcp_app_lock); |
597 | list_for_each_entry(i, &tcp_apps[hash], p_list) { | 592 | list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { |
598 | if (i->port == port) { | 593 | if (i->port == port) { |
599 | ret = -EEXIST; | 594 | ret = -EEXIST; |
600 | goto out; | 595 | goto out; |
601 | } | 596 | } |
602 | } | 597 | } |
603 | list_add(&inc->p_list, &tcp_apps[hash]); | 598 | list_add(&inc->p_list, &ipvs->tcp_apps[hash]); |
604 | atomic_inc(&ip_vs_protocol_tcp.appcnt); | 599 | atomic_inc(&pd->appcnt); |
605 | 600 | ||
606 | out: | 601 | out: |
607 | spin_unlock_bh(&tcp_app_lock); | 602 | spin_unlock_bh(&ipvs->tcp_app_lock); |
608 | return ret; | 603 | return ret; |
609 | } | 604 | } |
610 | 605 | ||
611 | 606 | ||
612 | static void | 607 | static void |
613 | tcp_unregister_app(struct ip_vs_app *inc) | 608 | tcp_unregister_app(struct net *net, struct ip_vs_app *inc) |
614 | { | 609 | { |
615 | spin_lock_bh(&tcp_app_lock); | 610 | struct netns_ipvs *ipvs = net_ipvs(net); |
616 | atomic_dec(&ip_vs_protocol_tcp.appcnt); | 611 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
612 | |||
613 | spin_lock_bh(&ipvs->tcp_app_lock); | ||
614 | atomic_dec(&pd->appcnt); | ||
617 | list_del(&inc->p_list); | 615 | list_del(&inc->p_list); |
618 | spin_unlock_bh(&tcp_app_lock); | 616 | spin_unlock_bh(&ipvs->tcp_app_lock); |
619 | } | 617 | } |
620 | 618 | ||
621 | 619 | ||
622 | static int | 620 | static int |
623 | tcp_app_conn_bind(struct ip_vs_conn *cp) | 621 | tcp_app_conn_bind(struct ip_vs_conn *cp) |
624 | { | 622 | { |
623 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
625 | int hash; | 624 | int hash; |
626 | struct ip_vs_app *inc; | 625 | struct ip_vs_app *inc; |
627 | int result = 0; | 626 | int result = 0; |
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
633 | /* Lookup application incarnations and bind the right one */ | 632 | /* Lookup application incarnations and bind the right one */ |
634 | hash = tcp_app_hashkey(cp->vport); | 633 | hash = tcp_app_hashkey(cp->vport); |
635 | 634 | ||
636 | spin_lock(&tcp_app_lock); | 635 | spin_lock(&ipvs->tcp_app_lock); |
637 | list_for_each_entry(inc, &tcp_apps[hash], p_list) { | 636 | list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { |
638 | if (inc->port == cp->vport) { | 637 | if (inc->port == cp->vport) { |
639 | if (unlikely(!ip_vs_app_inc_get(inc))) | 638 | if (unlikely(!ip_vs_app_inc_get(inc))) |
640 | break; | 639 | break; |
641 | spin_unlock(&tcp_app_lock); | 640 | spin_unlock(&ipvs->tcp_app_lock); |
642 | 641 | ||
643 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" | 642 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" |
644 | "%s:%u to app %s on port %u\n", | 643 | "%s:%u to app %s on port %u\n", |
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
655 | goto out; | 654 | goto out; |
656 | } | 655 | } |
657 | } | 656 | } |
658 | spin_unlock(&tcp_app_lock); | 657 | spin_unlock(&ipvs->tcp_app_lock); |
659 | 658 | ||
660 | out: | 659 | out: |
661 | return result; | 660 | return result; |
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
665 | /* | 664 | /* |
666 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) | 665 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) |
667 | */ | 666 | */ |
668 | void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) | 667 | void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) |
669 | { | 668 | { |
669 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); | ||
670 | |||
670 | spin_lock(&cp->lock); | 671 | spin_lock(&cp->lock); |
671 | cp->state = IP_VS_TCP_S_LISTEN; | 672 | cp->state = IP_VS_TCP_S_LISTEN; |
672 | cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; | 673 | cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] |
674 | : tcp_timeouts[IP_VS_TCP_S_LISTEN]); | ||
673 | spin_unlock(&cp->lock); | 675 | spin_unlock(&cp->lock); |
674 | } | 676 | } |
675 | 677 | ||
676 | 678 | /* --------------------------------------------- | |
677 | static void ip_vs_tcp_init(struct ip_vs_protocol *pp) | 679 | * timeouts is netns related now. |
680 | * --------------------------------------------- | ||
681 | */ | ||
682 | static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) | ||
678 | { | 683 | { |
679 | IP_VS_INIT_HASH_TABLE(tcp_apps); | 684 | struct netns_ipvs *ipvs = net_ipvs(net); |
680 | pp->timeout_table = tcp_timeouts; | ||
681 | } | ||
682 | 685 | ||
686 | ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); | ||
687 | spin_lock_init(&ipvs->tcp_app_lock); | ||
688 | pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, | ||
689 | sizeof(tcp_timeouts)); | ||
690 | pd->tcp_state_table = tcp_states; | ||
691 | } | ||
683 | 692 | ||
684 | static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) | 693 | static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) |
685 | { | 694 | { |
695 | kfree(pd->timeout_table); | ||
686 | } | 696 | } |
687 | 697 | ||
688 | 698 | ||
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
691 | .protocol = IPPROTO_TCP, | 701 | .protocol = IPPROTO_TCP, |
692 | .num_states = IP_VS_TCP_S_LAST, | 702 | .num_states = IP_VS_TCP_S_LAST, |
693 | .dont_defrag = 0, | 703 | .dont_defrag = 0, |
694 | .appcnt = ATOMIC_INIT(0), | 704 | .init = NULL, |
695 | .init = ip_vs_tcp_init, | 705 | .exit = NULL, |
696 | .exit = ip_vs_tcp_exit, | 706 | .init_netns = __ip_vs_tcp_init, |
707 | .exit_netns = __ip_vs_tcp_exit, | ||
697 | .register_app = tcp_register_app, | 708 | .register_app = tcp_register_app, |
698 | .unregister_app = tcp_unregister_app, | 709 | .unregister_app = tcp_unregister_app, |
699 | .conn_schedule = tcp_conn_schedule, | 710 | .conn_schedule = tcp_conn_schedule, |
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
707 | .app_conn_bind = tcp_app_conn_bind, | 718 | .app_conn_bind = tcp_app_conn_bind, |
708 | .debug_packet = ip_vs_tcpudp_debug_packet, | 719 | .debug_packet = ip_vs_tcpudp_debug_packet, |
709 | .timeout_change = tcp_timeout_change, | 720 | .timeout_change = tcp_timeout_change, |
710 | .set_state_timeout = tcp_set_state_timeout, | ||
711 | }; | 721 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 9d106a06bb0a..f1282cbe6fe3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c | |||
@@ -9,7 +9,8 @@ | |||
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
13 | * Network name space (netns) aware. | ||
13 | * | 14 | * |
14 | */ | 15 | */ |
15 | 16 | ||
@@ -28,9 +29,10 @@ | |||
28 | #include <net/ip6_checksum.h> | 29 | #include <net/ip6_checksum.h> |
29 | 30 | ||
30 | static int | 31 | static int |
31 | udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 32 | udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
32 | int *verdict, struct ip_vs_conn **cpp) | 33 | int *verdict, struct ip_vs_conn **cpp) |
33 | { | 34 | { |
35 | struct net *net; | ||
34 | struct ip_vs_service *svc; | 36 | struct ip_vs_service *svc; |
35 | struct udphdr _udph, *uh; | 37 | struct udphdr _udph, *uh; |
36 | struct ip_vs_iphdr iph; | 38 | struct ip_vs_iphdr iph; |
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
42 | *verdict = NF_DROP; | 44 | *verdict = NF_DROP; |
43 | return 0; | 45 | return 0; |
44 | } | 46 | } |
45 | 47 | net = skb_net(skb); | |
46 | svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 48 | svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
47 | &iph.daddr, uh->dest); | 49 | &iph.daddr, uh->dest); |
48 | if (svc) { | 50 | if (svc) { |
49 | int ignored; | 51 | int ignored; |
50 | 52 | ||
51 | if (ip_vs_todrop()) { | 53 | if (ip_vs_todrop(net_ipvs(net))) { |
52 | /* | 54 | /* |
53 | * It seems that we are very loaded. | 55 | * It seems that we are very loaded. |
54 | * We have to drop this packet :( | 56 | * We have to drop this packet :( |
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
62 | * Let the virtual server select a real server for the | 64 | * Let the virtual server select a real server for the |
63 | * incoming connection, and create a connection entry. | 65 | * incoming connection, and create a connection entry. |
64 | */ | 66 | */ |
65 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 67 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
66 | if (!*cpp && !ignored) { | 68 | if (!*cpp && ignored <= 0) { |
67 | *verdict = ip_vs_leave(svc, skb, pp); | 69 | if (!ignored) |
70 | *verdict = ip_vs_leave(svc, skb, pd); | ||
71 | else { | ||
72 | ip_vs_service_put(svc); | ||
73 | *verdict = NF_DROP; | ||
74 | } | ||
68 | return 0; | 75 | return 0; |
69 | } | 76 | } |
70 | ip_vs_service_put(svc); | 77 | ip_vs_service_put(svc); |
71 | } | 78 | } |
79 | /* NF_ACCEPT */ | ||
72 | return 1; | 80 | return 1; |
73 | } | 81 | } |
74 | 82 | ||
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
338 | return 1; | 346 | return 1; |
339 | } | 347 | } |
340 | 348 | ||
341 | |||
342 | /* | ||
343 | * Note: the caller guarantees that only one of register_app, | ||
344 | * unregister_app or app_conn_bind is called each time. | ||
345 | */ | ||
346 | |||
347 | #define UDP_APP_TAB_BITS 4 | ||
348 | #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) | ||
349 | #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) | ||
350 | |||
351 | static struct list_head udp_apps[UDP_APP_TAB_SIZE]; | ||
352 | static DEFINE_SPINLOCK(udp_app_lock); | ||
353 | |||
354 | static inline __u16 udp_app_hashkey(__be16 port) | 349 | static inline __u16 udp_app_hashkey(__be16 port) |
355 | { | 350 | { |
356 | return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) | 351 | return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) |
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port) | |||
358 | } | 353 | } |
359 | 354 | ||
360 | 355 | ||
361 | static int udp_register_app(struct ip_vs_app *inc) | 356 | static int udp_register_app(struct net *net, struct ip_vs_app *inc) |
362 | { | 357 | { |
363 | struct ip_vs_app *i; | 358 | struct ip_vs_app *i; |
364 | __u16 hash; | 359 | __u16 hash; |
365 | __be16 port = inc->port; | 360 | __be16 port = inc->port; |
366 | int ret = 0; | 361 | int ret = 0; |
362 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
363 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); | ||
367 | 364 | ||
368 | hash = udp_app_hashkey(port); | 365 | hash = udp_app_hashkey(port); |
369 | 366 | ||
370 | 367 | ||
371 | spin_lock_bh(&udp_app_lock); | 368 | spin_lock_bh(&ipvs->udp_app_lock); |
372 | list_for_each_entry(i, &udp_apps[hash], p_list) { | 369 | list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { |
373 | if (i->port == port) { | 370 | if (i->port == port) { |
374 | ret = -EEXIST; | 371 | ret = -EEXIST; |
375 | goto out; | 372 | goto out; |
376 | } | 373 | } |
377 | } | 374 | } |
378 | list_add(&inc->p_list, &udp_apps[hash]); | 375 | list_add(&inc->p_list, &ipvs->udp_apps[hash]); |
379 | atomic_inc(&ip_vs_protocol_udp.appcnt); | 376 | atomic_inc(&pd->appcnt); |
380 | 377 | ||
381 | out: | 378 | out: |
382 | spin_unlock_bh(&udp_app_lock); | 379 | spin_unlock_bh(&ipvs->udp_app_lock); |
383 | return ret; | 380 | return ret; |
384 | } | 381 | } |
385 | 382 | ||
386 | 383 | ||
387 | static void | 384 | static void |
388 | udp_unregister_app(struct ip_vs_app *inc) | 385 | udp_unregister_app(struct net *net, struct ip_vs_app *inc) |
389 | { | 386 | { |
390 | spin_lock_bh(&udp_app_lock); | 387 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); |
391 | atomic_dec(&ip_vs_protocol_udp.appcnt); | 388 | struct netns_ipvs *ipvs = net_ipvs(net); |
389 | |||
390 | spin_lock_bh(&ipvs->udp_app_lock); | ||
391 | atomic_dec(&pd->appcnt); | ||
392 | list_del(&inc->p_list); | 392 | list_del(&inc->p_list); |
393 | spin_unlock_bh(&udp_app_lock); | 393 | spin_unlock_bh(&ipvs->udp_app_lock); |
394 | } | 394 | } |
395 | 395 | ||
396 | 396 | ||
397 | static int udp_app_conn_bind(struct ip_vs_conn *cp) | 397 | static int udp_app_conn_bind(struct ip_vs_conn *cp) |
398 | { | 398 | { |
399 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
399 | int hash; | 400 | int hash; |
400 | struct ip_vs_app *inc; | 401 | struct ip_vs_app *inc; |
401 | int result = 0; | 402 | int result = 0; |
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
407 | /* Lookup application incarnations and bind the right one */ | 408 | /* Lookup application incarnations and bind the right one */ |
408 | hash = udp_app_hashkey(cp->vport); | 409 | hash = udp_app_hashkey(cp->vport); |
409 | 410 | ||
410 | spin_lock(&udp_app_lock); | 411 | spin_lock(&ipvs->udp_app_lock); |
411 | list_for_each_entry(inc, &udp_apps[hash], p_list) { | 412 | list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { |
412 | if (inc->port == cp->vport) { | 413 | if (inc->port == cp->vport) { |
413 | if (unlikely(!ip_vs_app_inc_get(inc))) | 414 | if (unlikely(!ip_vs_app_inc_get(inc))) |
414 | break; | 415 | break; |
415 | spin_unlock(&udp_app_lock); | 416 | spin_unlock(&ipvs->udp_app_lock); |
416 | 417 | ||
417 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" | 418 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" |
418 | "%s:%u to app %s on port %u\n", | 419 | "%s:%u to app %s on port %u\n", |
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
429 | goto out; | 430 | goto out; |
430 | } | 431 | } |
431 | } | 432 | } |
432 | spin_unlock(&udp_app_lock); | 433 | spin_unlock(&ipvs->udp_app_lock); |
433 | 434 | ||
434 | out: | 435 | out: |
435 | return result; | 436 | return result; |
436 | } | 437 | } |
437 | 438 | ||
438 | 439 | ||
439 | static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { | 440 | static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { |
440 | [IP_VS_UDP_S_NORMAL] = 5*60*HZ, | 441 | [IP_VS_UDP_S_NORMAL] = 5*60*HZ, |
441 | [IP_VS_UDP_S_LAST] = 2*HZ, | 442 | [IP_VS_UDP_S_LAST] = 2*HZ, |
442 | }; | 443 | }; |
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { | |||
446 | [IP_VS_UDP_S_LAST] = "BUG!", | 447 | [IP_VS_UDP_S_LAST] = "BUG!", |
447 | }; | 448 | }; |
448 | 449 | ||
449 | |||
450 | static int | ||
451 | udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
452 | { | ||
453 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, | ||
454 | udp_state_name_table, sname, to); | ||
455 | } | ||
456 | |||
457 | static const char * udp_state_name(int state) | 450 | static const char * udp_state_name(int state) |
458 | { | 451 | { |
459 | if (state >= IP_VS_UDP_S_LAST) | 452 | if (state >= IP_VS_UDP_S_LAST) |
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state) | |||
464 | static int | 457 | static int |
465 | udp_state_transition(struct ip_vs_conn *cp, int direction, | 458 | udp_state_transition(struct ip_vs_conn *cp, int direction, |
466 | const struct sk_buff *skb, | 459 | const struct sk_buff *skb, |
467 | struct ip_vs_protocol *pp) | 460 | struct ip_vs_proto_data *pd) |
468 | { | 461 | { |
469 | cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; | 462 | if (unlikely(!pd)) { |
463 | pr_err("UDP no ns data\n"); | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; | ||
470 | return 1; | 468 | return 1; |
471 | } | 469 | } |
472 | 470 | ||
473 | static void udp_init(struct ip_vs_protocol *pp) | 471 | static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) |
474 | { | 472 | { |
475 | IP_VS_INIT_HASH_TABLE(udp_apps); | 473 | struct netns_ipvs *ipvs = net_ipvs(net); |
476 | pp->timeout_table = udp_timeouts; | 474 | |
475 | ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); | ||
476 | spin_lock_init(&ipvs->udp_app_lock); | ||
477 | pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, | ||
478 | sizeof(udp_timeouts)); | ||
477 | } | 479 | } |
478 | 480 | ||
479 | static void udp_exit(struct ip_vs_protocol *pp) | 481 | static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) |
480 | { | 482 | { |
483 | kfree(pd->timeout_table); | ||
481 | } | 484 | } |
482 | 485 | ||
483 | 486 | ||
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = { | |||
486 | .protocol = IPPROTO_UDP, | 489 | .protocol = IPPROTO_UDP, |
487 | .num_states = IP_VS_UDP_S_LAST, | 490 | .num_states = IP_VS_UDP_S_LAST, |
488 | .dont_defrag = 0, | 491 | .dont_defrag = 0, |
489 | .init = udp_init, | 492 | .init = NULL, |
490 | .exit = udp_exit, | 493 | .exit = NULL, |
494 | .init_netns = __udp_init, | ||
495 | .exit_netns = __udp_exit, | ||
491 | .conn_schedule = udp_conn_schedule, | 496 | .conn_schedule = udp_conn_schedule, |
492 | .conn_in_get = ip_vs_conn_in_get_proto, | 497 | .conn_in_get = ip_vs_conn_in_get_proto, |
493 | .conn_out_get = ip_vs_conn_out_get_proto, | 498 | .conn_out_get = ip_vs_conn_out_get_proto, |
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = { | |||
501 | .app_conn_bind = udp_app_conn_bind, | 506 | .app_conn_bind = udp_app_conn_bind, |
502 | .debug_packet = ip_vs_tcpudp_debug_packet, | 507 | .debug_packet = ip_vs_tcpudp_debug_packet, |
503 | .timeout_change = NULL, | 508 | .timeout_change = NULL, |
504 | .set_state_timeout = udp_set_state_timeout, | ||
505 | }; | 509 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index ab85aedea17e..d1adf988eb08 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -5,6 +5,18 @@ | |||
5 | * high-performance and highly available server based on a | 5 | * high-performance and highly available server based on a |
6 | * cluster of servers. | 6 | * cluster of servers. |
7 | * | 7 | * |
8 | * Version 1, is capable of handling both version 0 and 1 messages. | ||
9 | * Version 0 is the plain old format. | ||
10 | * Note Version 0 receivers will just drop Ver 1 messages. | ||
11 | * Version 1 is capable of handle IPv6, Persistence data, | ||
12 | * time-outs, and firewall marks. | ||
13 | * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. | ||
14 | * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 | ||
15 | * | ||
16 | * Definitions Message: is a complete datagram | ||
17 | * Sync_conn: is a part of a Message | ||
18 | * Param Data is an option to a Sync_conn. | ||
19 | * | ||
8 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | 20 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
9 | * | 21 | * |
10 | * ip_vs_sync: sync connection info from master load balancer to backups | 22 | * ip_vs_sync: sync connection info from master load balancer to backups |
@@ -15,6 +27,8 @@ | |||
15 | * Alexandre Cassen : Added SyncID support for incoming sync | 27 | * Alexandre Cassen : Added SyncID support for incoming sync |
16 | * messages filtering. | 28 | * messages filtering. |
17 | * Justin Ossevoort : Fix endian problem on sync message size. | 29 | * Justin Ossevoort : Fix endian problem on sync message size. |
30 | * Hans Schillstrom : Added Version 1: i.e. IPv6, | ||
31 | * Persistence support, fwmark and time-out. | ||
18 | */ | 32 | */ |
19 | 33 | ||
20 | #define KMSG_COMPONENT "IPVS" | 34 | #define KMSG_COMPONENT "IPVS" |
@@ -35,6 +49,8 @@ | |||
35 | #include <linux/wait.h> | 49 | #include <linux/wait.h> |
36 | #include <linux/kernel.h> | 50 | #include <linux/kernel.h> |
37 | 51 | ||
52 | #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ | ||
53 | |||
38 | #include <net/ip.h> | 54 | #include <net/ip.h> |
39 | #include <net/sock.h> | 55 | #include <net/sock.h> |
40 | 56 | ||
@@ -43,11 +59,13 @@ | |||
43 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ | 59 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ |
44 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ | 60 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ |
45 | 61 | ||
62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ | ||
46 | 63 | ||
47 | /* | 64 | /* |
48 | * IPVS sync connection entry | 65 | * IPVS sync connection entry |
66 | * Version 0, i.e. original version. | ||
49 | */ | 67 | */ |
50 | struct ip_vs_sync_conn { | 68 | struct ip_vs_sync_conn_v0 { |
51 | __u8 reserved; | 69 | __u8 reserved; |
52 | 70 | ||
53 | /* Protocol, addresses and port numbers */ | 71 | /* Protocol, addresses and port numbers */ |
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options { | |||
71 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | 89 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ |
72 | }; | 90 | }; |
73 | 91 | ||
92 | /* | ||
93 | Sync Connection format (sync_conn) | ||
94 | |||
95 | 0 1 2 3 | ||
96 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
97 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
98 | | Type | Protocol | Ver. | Size | | ||
99 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
100 | | Flags | | ||
101 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
102 | | State | cport | | ||
103 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | | vport | dport | | ||
105 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | | fwmark | | ||
107 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
108 | | timeout (in sec.) | | ||
109 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
110 | | ... | | ||
111 | | IP-Addresses (v4 or v6) | | ||
112 | | ... | | ||
113 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
114 | Optional Parameters. | ||
115 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
116 | | Param. Type | Param. Length | Param. data | | ||
117 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ||
118 | | ... | | ||
119 | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
120 | | | Param Type | Param. Length | | ||
121 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
122 | | Param data | | ||
123 | | Last Param data should be padded for 32 bit alignment | | ||
124 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
125 | */ | ||
126 | |||
127 | /* | ||
128 | * Type 0, IPv4 sync connection format | ||
129 | */ | ||
130 | struct ip_vs_sync_v4 { | ||
131 | __u8 type; | ||
132 | __u8 protocol; /* Which protocol (TCP/UDP) */ | ||
133 | __be16 ver_size; /* Version msb 4 bits */ | ||
134 | /* Flags and state transition */ | ||
135 | __be32 flags; /* status flags */ | ||
136 | __be16 state; /* state info */ | ||
137 | /* Protocol, addresses and port numbers */ | ||
138 | __be16 cport; | ||
139 | __be16 vport; | ||
140 | __be16 dport; | ||
141 | __be32 fwmark; /* Firewall mark from skb */ | ||
142 | __be32 timeout; /* cp timeout */ | ||
143 | __be32 caddr; /* client address */ | ||
144 | __be32 vaddr; /* virtual address */ | ||
145 | __be32 daddr; /* destination address */ | ||
146 | /* The sequence options start here */ | ||
147 | /* PE data padded to 32bit alignment after seq. options */ | ||
148 | }; | ||
149 | /* | ||
150 | * Type 2 messages IPv6 | ||
151 | */ | ||
152 | struct ip_vs_sync_v6 { | ||
153 | __u8 type; | ||
154 | __u8 protocol; /* Which protocol (TCP/UDP) */ | ||
155 | __be16 ver_size; /* Version msb 4 bits */ | ||
156 | /* Flags and state transition */ | ||
157 | __be32 flags; /* status flags */ | ||
158 | __be16 state; /* state info */ | ||
159 | /* Protocol, addresses and port numbers */ | ||
160 | __be16 cport; | ||
161 | __be16 vport; | ||
162 | __be16 dport; | ||
163 | __be32 fwmark; /* Firewall mark from skb */ | ||
164 | __be32 timeout; /* cp timeout */ | ||
165 | struct in6_addr caddr; /* client address */ | ||
166 | struct in6_addr vaddr; /* virtual address */ | ||
167 | struct in6_addr daddr; /* destination address */ | ||
168 | /* The sequence options start here */ | ||
169 | /* PE data padded to 32bit alignment after seq. options */ | ||
170 | }; | ||
171 | |||
172 | union ip_vs_sync_conn { | ||
173 | struct ip_vs_sync_v4 v4; | ||
174 | struct ip_vs_sync_v6 v6; | ||
175 | }; | ||
176 | |||
177 | /* Bits in Type field in above */ | ||
178 | #define STYPE_INET6 0 | ||
179 | #define STYPE_F_INET6 (1 << STYPE_INET6) | ||
180 | |||
181 | #define SVER_SHIFT 12 /* Shift to get version */ | ||
182 | #define SVER_MASK 0x0fff /* Mask to strip version */ | ||
183 | |||
184 | #define IPVS_OPT_SEQ_DATA 1 | ||
185 | #define IPVS_OPT_PE_DATA 2 | ||
186 | #define IPVS_OPT_PE_NAME 3 | ||
187 | #define IPVS_OPT_PARAM 7 | ||
188 | |||
189 | #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) | ||
190 | #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) | ||
191 | #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) | ||
192 | #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) | ||
193 | |||
74 | struct ip_vs_sync_thread_data { | 194 | struct ip_vs_sync_thread_data { |
195 | struct net *net; | ||
75 | struct socket *sock; | 196 | struct socket *sock; |
76 | char *buf; | 197 | char *buf; |
77 | }; | 198 | }; |
78 | 199 | ||
79 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) | 200 | /* Version 0 definition of packet sizes */ |
201 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) | ||
80 | #define FULL_CONN_SIZE \ | 202 | #define FULL_CONN_SIZE \ |
81 | (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) | 203 | (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) |
82 | 204 | ||
83 | 205 | ||
84 | /* | 206 | /* |
85 | The master mulitcasts messages to the backup load balancers in the | 207 | The master mulitcasts messages (Datagrams) to the backup load balancers |
86 | following format. | 208 | in the following format. |
209 | |||
210 | Version 1: | ||
211 | Note, first byte should be Zero, so ver 0 receivers will drop the packet. | ||
87 | 212 | ||
88 | 0 1 2 3 | 213 | 0 1 2 3 |
89 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 214 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 |
90 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 215 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
91 | | Count Conns | SyncID | Size | | 216 | | 0 | SyncID | Size | |
217 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
218 | | Count Conns | Version | Reserved, set to Zero | | ||
92 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 219 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
93 | | | | 220 | | | |
94 | | IPVS Sync Connection (1) | | 221 | | IPVS Sync Connection (1) | |
95 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 222 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
96 | | . | | 223 | | . | |
97 | | . | | 224 | ~ . ~ |
98 | | . | | 225 | | . | |
99 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 226 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
100 | | | | 227 | | | |
101 | | IPVS Sync Connection (n) | | 228 | | IPVS Sync Connection (n) | |
102 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 229 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
230 | |||
231 | Version 0 Header | ||
232 | 0 1 2 3 | ||
233 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
234 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
235 | | Count Conns | SyncID | Size | | ||
236 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
237 | | IPVS Sync Connection (1) | | ||
103 | */ | 238 | */ |
104 | 239 | ||
105 | #define SYNC_MESG_HEADER_LEN 4 | 240 | #define SYNC_MESG_HEADER_LEN 4 |
106 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ | 241 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ |
107 | 242 | ||
108 | struct ip_vs_sync_mesg { | 243 | /* Version 0 header */ |
244 | struct ip_vs_sync_mesg_v0 { | ||
109 | __u8 nr_conns; | 245 | __u8 nr_conns; |
110 | __u8 syncid; | 246 | __u8 syncid; |
111 | __u16 size; | 247 | __u16 size; |
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg { | |||
113 | /* ip_vs_sync_conn entries start here */ | 249 | /* ip_vs_sync_conn entries start here */ |
114 | }; | 250 | }; |
115 | 251 | ||
116 | /* the maximum length of sync (sending/receiving) message */ | 252 | /* Version 1 header */ |
117 | static int sync_send_mesg_maxlen; | 253 | struct ip_vs_sync_mesg { |
118 | static int sync_recv_mesg_maxlen; | 254 | __u8 reserved; /* must be zero */ |
255 | __u8 syncid; | ||
256 | __u16 size; | ||
257 | __u8 nr_conns; | ||
258 | __s8 version; /* SYNC_PROTO_VER */ | ||
259 | __u16 spare; | ||
260 | /* ip_vs_sync_conn entries start here */ | ||
261 | }; | ||
119 | 262 | ||
120 | struct ip_vs_sync_buff { | 263 | struct ip_vs_sync_buff { |
121 | struct list_head list; | 264 | struct list_head list; |
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff { | |||
127 | unsigned char *end; | 270 | unsigned char *end; |
128 | }; | 271 | }; |
129 | 272 | ||
130 | |||
131 | /* the sync_buff list head and the lock */ | ||
132 | static LIST_HEAD(ip_vs_sync_queue); | ||
133 | static DEFINE_SPINLOCK(ip_vs_sync_lock); | ||
134 | |||
135 | /* current sync_buff for accepting new conn entries */ | ||
136 | static struct ip_vs_sync_buff *curr_sb = NULL; | ||
137 | static DEFINE_SPINLOCK(curr_sb_lock); | ||
138 | |||
139 | /* ipvs sync daemon state */ | ||
140 | volatile int ip_vs_sync_state = IP_VS_STATE_NONE; | ||
141 | volatile int ip_vs_master_syncid = 0; | ||
142 | volatile int ip_vs_backup_syncid = 0; | ||
143 | |||
144 | /* multicast interface name */ | ||
145 | char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
146 | char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
147 | |||
148 | /* sync daemon tasks */ | ||
149 | static struct task_struct *sync_master_thread; | ||
150 | static struct task_struct *sync_backup_thread; | ||
151 | |||
152 | /* multicast addr */ | 273 | /* multicast addr */ |
153 | static struct sockaddr_in mcast_addr = { | 274 | static struct sockaddr_in mcast_addr = { |
154 | .sin_family = AF_INET, | 275 | .sin_family = AF_INET, |
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = { | |||
156 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | 277 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), |
157 | }; | 278 | }; |
158 | 279 | ||
280 | /* | ||
281 | * Copy of struct ip_vs_seq | ||
282 | * From unaligned network order to aligned host order | ||
283 | */ | ||
284 | static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) | ||
285 | { | ||
286 | ho->init_seq = get_unaligned_be32(&no->init_seq); | ||
287 | ho->delta = get_unaligned_be32(&no->delta); | ||
288 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Copy of struct ip_vs_seq | ||
293 | * From Aligned host order to unaligned network order | ||
294 | */ | ||
295 | static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) | ||
296 | { | ||
297 | put_unaligned_be32(ho->init_seq, &no->init_seq); | ||
298 | put_unaligned_be32(ho->delta, &no->delta); | ||
299 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); | ||
300 | } | ||
159 | 301 | ||
160 | static inline struct ip_vs_sync_buff *sb_dequeue(void) | 302 | static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) |
161 | { | 303 | { |
162 | struct ip_vs_sync_buff *sb; | 304 | struct ip_vs_sync_buff *sb; |
163 | 305 | ||
164 | spin_lock_bh(&ip_vs_sync_lock); | 306 | spin_lock_bh(&ipvs->sync_lock); |
165 | if (list_empty(&ip_vs_sync_queue)) { | 307 | if (list_empty(&ipvs->sync_queue)) { |
166 | sb = NULL; | 308 | sb = NULL; |
167 | } else { | 309 | } else { |
168 | sb = list_entry(ip_vs_sync_queue.next, | 310 | sb = list_entry(ipvs->sync_queue.next, |
169 | struct ip_vs_sync_buff, | 311 | struct ip_vs_sync_buff, |
170 | list); | 312 | list); |
171 | list_del(&sb->list); | 313 | list_del(&sb->list); |
172 | } | 314 | } |
173 | spin_unlock_bh(&ip_vs_sync_lock); | 315 | spin_unlock_bh(&ipvs->sync_lock); |
174 | 316 | ||
175 | return sb; | 317 | return sb; |
176 | } | 318 | } |
177 | 319 | ||
178 | static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) | 320 | /* |
321 | * Create a new sync buffer for Version 1 proto. | ||
322 | */ | ||
323 | static inline struct ip_vs_sync_buff * | ||
324 | ip_vs_sync_buff_create(struct netns_ipvs *ipvs) | ||
179 | { | 325 | { |
180 | struct ip_vs_sync_buff *sb; | 326 | struct ip_vs_sync_buff *sb; |
181 | 327 | ||
182 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | 328 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) |
183 | return NULL; | 329 | return NULL; |
184 | 330 | ||
185 | if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { | 331 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); |
332 | if (!sb->mesg) { | ||
186 | kfree(sb); | 333 | kfree(sb); |
187 | return NULL; | 334 | return NULL; |
188 | } | 335 | } |
336 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ | ||
337 | sb->mesg->version = SYNC_PROTO_VER; | ||
338 | sb->mesg->syncid = ipvs->master_syncid; | ||
339 | sb->mesg->size = sizeof(struct ip_vs_sync_mesg); | ||
189 | sb->mesg->nr_conns = 0; | 340 | sb->mesg->nr_conns = 0; |
190 | sb->mesg->syncid = ip_vs_master_syncid; | 341 | sb->mesg->spare = 0; |
191 | sb->mesg->size = 4; | 342 | sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); |
192 | sb->head = (unsigned char *)sb->mesg + 4; | 343 | sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; |
193 | sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; | 344 | |
194 | sb->firstuse = jiffies; | 345 | sb->firstuse = jiffies; |
195 | return sb; | 346 | return sb; |
196 | } | 347 | } |
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) | |||
201 | kfree(sb); | 352 | kfree(sb); |
202 | } | 353 | } |
203 | 354 | ||
204 | static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | 355 | static inline void sb_queue_tail(struct netns_ipvs *ipvs) |
205 | { | 356 | { |
206 | spin_lock(&ip_vs_sync_lock); | 357 | struct ip_vs_sync_buff *sb = ipvs->sync_buff; |
207 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) | 358 | |
208 | list_add_tail(&sb->list, &ip_vs_sync_queue); | 359 | spin_lock(&ipvs->sync_lock); |
360 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
361 | list_add_tail(&sb->list, &ipvs->sync_queue); | ||
209 | else | 362 | else |
210 | ip_vs_sync_buff_release(sb); | 363 | ip_vs_sync_buff_release(sb); |
211 | spin_unlock(&ip_vs_sync_lock); | 364 | spin_unlock(&ipvs->sync_lock); |
212 | } | 365 | } |
213 | 366 | ||
214 | /* | 367 | /* |
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | |||
216 | * than the specified time or the specified time is zero. | 369 | * than the specified time or the specified time is zero. |
217 | */ | 370 | */ |
218 | static inline struct ip_vs_sync_buff * | 371 | static inline struct ip_vs_sync_buff * |
219 | get_curr_sync_buff(unsigned long time) | 372 | get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) |
220 | { | 373 | { |
221 | struct ip_vs_sync_buff *sb; | 374 | struct ip_vs_sync_buff *sb; |
222 | 375 | ||
223 | spin_lock_bh(&curr_sb_lock); | 376 | spin_lock_bh(&ipvs->sync_buff_lock); |
224 | if (curr_sb && (time == 0 || | 377 | if (ipvs->sync_buff && (time == 0 || |
225 | time_before(jiffies - curr_sb->firstuse, time))) { | 378 | time_before(jiffies - ipvs->sync_buff->firstuse, time))) { |
226 | sb = curr_sb; | 379 | sb = ipvs->sync_buff; |
227 | curr_sb = NULL; | 380 | ipvs->sync_buff = NULL; |
228 | } else | 381 | } else |
229 | sb = NULL; | 382 | sb = NULL; |
230 | spin_unlock_bh(&curr_sb_lock); | 383 | spin_unlock_bh(&ipvs->sync_buff_lock); |
231 | return sb; | 384 | return sb; |
232 | } | 385 | } |
233 | 386 | ||
387 | /* | ||
388 | * Switch mode from sending version 0 or 1 | ||
389 | * - must handle sync_buf | ||
390 | */ | ||
391 | void ip_vs_sync_switch_mode(struct net *net, int mode) | ||
392 | { | ||
393 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
394 | |||
395 | if (!ipvs->sync_state & IP_VS_STATE_MASTER) | ||
396 | return; | ||
397 | if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) | ||
398 | return; | ||
399 | |||
400 | spin_lock_bh(&ipvs->sync_buff_lock); | ||
401 | /* Buffer empty ? then let buf_create do the job */ | ||
402 | if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { | ||
403 | kfree(ipvs->sync_buff); | ||
404 | ipvs->sync_buff = NULL; | ||
405 | } else { | ||
406 | spin_lock_bh(&ipvs->sync_lock); | ||
407 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
408 | list_add_tail(&ipvs->sync_buff->list, | ||
409 | &ipvs->sync_queue); | ||
410 | else | ||
411 | ip_vs_sync_buff_release(ipvs->sync_buff); | ||
412 | spin_unlock_bh(&ipvs->sync_lock); | ||
413 | } | ||
414 | spin_unlock_bh(&ipvs->sync_buff_lock); | ||
415 | } | ||
234 | 416 | ||
235 | /* | 417 | /* |
418 | * Create a new sync buffer for Version 0 proto. | ||
419 | */ | ||
420 | static inline struct ip_vs_sync_buff * | ||
421 | ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) | ||
422 | { | ||
423 | struct ip_vs_sync_buff *sb; | ||
424 | struct ip_vs_sync_mesg_v0 *mesg; | ||
425 | |||
426 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | ||
427 | return NULL; | ||
428 | |||
429 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); | ||
430 | if (!sb->mesg) { | ||
431 | kfree(sb); | ||
432 | return NULL; | ||
433 | } | ||
434 | mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; | ||
435 | mesg->nr_conns = 0; | ||
436 | mesg->syncid = ipvs->master_syncid; | ||
437 | mesg->size = sizeof(struct ip_vs_sync_mesg_v0); | ||
438 | sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); | ||
439 | sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; | ||
440 | sb->firstuse = jiffies; | ||
441 | return sb; | ||
442 | } | ||
443 | |||
444 | /* | ||
445 | * Version 0 , could be switched in by sys_ctl. | ||
236 | * Add an ip_vs_conn information into the current sync_buff. | 446 | * Add an ip_vs_conn information into the current sync_buff. |
237 | * Called by ip_vs_in. | ||
238 | */ | 447 | */ |
239 | void ip_vs_sync_conn(struct ip_vs_conn *cp) | 448 | void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) |
240 | { | 449 | { |
241 | struct ip_vs_sync_mesg *m; | 450 | struct netns_ipvs *ipvs = net_ipvs(net); |
242 | struct ip_vs_sync_conn *s; | 451 | struct ip_vs_sync_mesg_v0 *m; |
452 | struct ip_vs_sync_conn_v0 *s; | ||
243 | int len; | 453 | int len; |
244 | 454 | ||
245 | spin_lock(&curr_sb_lock); | 455 | if (unlikely(cp->af != AF_INET)) |
246 | if (!curr_sb) { | 456 | return; |
247 | if (!(curr_sb=ip_vs_sync_buff_create())) { | 457 | /* Do not sync ONE PACKET */ |
248 | spin_unlock(&curr_sb_lock); | 458 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
459 | return; | ||
460 | |||
461 | spin_lock(&ipvs->sync_buff_lock); | ||
462 | if (!ipvs->sync_buff) { | ||
463 | ipvs->sync_buff = | ||
464 | ip_vs_sync_buff_create_v0(ipvs); | ||
465 | if (!ipvs->sync_buff) { | ||
466 | spin_unlock(&ipvs->sync_buff_lock); | ||
249 | pr_err("ip_vs_sync_buff_create failed.\n"); | 467 | pr_err("ip_vs_sync_buff_create failed.\n"); |
250 | return; | 468 | return; |
251 | } | 469 | } |
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
253 | 471 | ||
254 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | 472 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : |
255 | SIMPLE_CONN_SIZE; | 473 | SIMPLE_CONN_SIZE; |
256 | m = curr_sb->mesg; | 474 | m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; |
257 | s = (struct ip_vs_sync_conn *)curr_sb->head; | 475 | s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; |
258 | 476 | ||
259 | /* copy members */ | 477 | /* copy members */ |
478 | s->reserved = 0; | ||
260 | s->protocol = cp->protocol; | 479 | s->protocol = cp->protocol; |
261 | s->cport = cp->cport; | 480 | s->cport = cp->cport; |
262 | s->vport = cp->vport; | 481 | s->vport = cp->vport; |
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
274 | 493 | ||
275 | m->nr_conns++; | 494 | m->nr_conns++; |
276 | m->size += len; | 495 | m->size += len; |
277 | curr_sb->head += len; | 496 | ipvs->sync_buff->head += len; |
278 | 497 | ||
279 | /* check if there is a space for next one */ | 498 | /* check if there is a space for next one */ |
280 | if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { | 499 | if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { |
281 | sb_queue_tail(curr_sb); | 500 | sb_queue_tail(ipvs); |
282 | curr_sb = NULL; | 501 | ipvs->sync_buff = NULL; |
283 | } | 502 | } |
284 | spin_unlock(&curr_sb_lock); | 503 | spin_unlock(&ipvs->sync_buff_lock); |
285 | 504 | ||
286 | /* synchronize its controller if it has */ | 505 | /* synchronize its controller if it has */ |
287 | if (cp->control) | 506 | if (cp->control) |
288 | ip_vs_sync_conn(cp->control); | 507 | ip_vs_sync_conn(net, cp->control); |
508 | } | ||
509 | |||
510 | /* | ||
511 | * Add an ip_vs_conn information into the current sync_buff. | ||
512 | * Called by ip_vs_in. | ||
513 | * Sending Version 1 messages | ||
514 | */ | ||
515 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) | ||
516 | { | ||
517 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
518 | struct ip_vs_sync_mesg *m; | ||
519 | union ip_vs_sync_conn *s; | ||
520 | __u8 *p; | ||
521 | unsigned int len, pe_name_len, pad; | ||
522 | |||
523 | /* Handle old version of the protocol */ | ||
524 | if (ipvs->sysctl_sync_ver == 0) { | ||
525 | ip_vs_sync_conn_v0(net, cp); | ||
526 | return; | ||
527 | } | ||
528 | /* Do not sync ONE PACKET */ | ||
529 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | ||
530 | goto control; | ||
531 | sloop: | ||
532 | /* Sanity checks */ | ||
533 | pe_name_len = 0; | ||
534 | if (cp->pe_data_len) { | ||
535 | if (!cp->pe_data || !cp->dest) { | ||
536 | IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); | ||
537 | return; | ||
538 | } | ||
539 | pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); | ||
540 | } | ||
541 | |||
542 | spin_lock(&ipvs->sync_buff_lock); | ||
543 | |||
544 | #ifdef CONFIG_IP_VS_IPV6 | ||
545 | if (cp->af == AF_INET6) | ||
546 | len = sizeof(struct ip_vs_sync_v6); | ||
547 | else | ||
548 | #endif | ||
549 | len = sizeof(struct ip_vs_sync_v4); | ||
550 | |||
551 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) | ||
552 | len += sizeof(struct ip_vs_sync_conn_options) + 2; | ||
553 | |||
554 | if (cp->pe_data_len) | ||
555 | len += cp->pe_data_len + 2; /* + Param hdr field */ | ||
556 | if (pe_name_len) | ||
557 | len += pe_name_len + 2; | ||
558 | |||
559 | /* check if there is a space for this one */ | ||
560 | pad = 0; | ||
561 | if (ipvs->sync_buff) { | ||
562 | pad = (4 - (size_t)ipvs->sync_buff->head) & 3; | ||
563 | if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { | ||
564 | sb_queue_tail(ipvs); | ||
565 | ipvs->sync_buff = NULL; | ||
566 | pad = 0; | ||
567 | } | ||
568 | } | ||
569 | |||
570 | if (!ipvs->sync_buff) { | ||
571 | ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); | ||
572 | if (!ipvs->sync_buff) { | ||
573 | spin_unlock(&ipvs->sync_buff_lock); | ||
574 | pr_err("ip_vs_sync_buff_create failed.\n"); | ||
575 | return; | ||
576 | } | ||
577 | } | ||
578 | |||
579 | m = ipvs->sync_buff->mesg; | ||
580 | p = ipvs->sync_buff->head; | ||
581 | ipvs->sync_buff->head += pad + len; | ||
582 | m->size += pad + len; | ||
583 | /* Add ev. padding from prev. sync_conn */ | ||
584 | while (pad--) | ||
585 | *(p++) = 0; | ||
586 | |||
587 | s = (union ip_vs_sync_conn *)p; | ||
588 | |||
589 | /* Set message type & copy members */ | ||
590 | s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); | ||
591 | s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ | ||
592 | s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); | ||
593 | s->v4.state = htons(cp->state); | ||
594 | s->v4.protocol = cp->protocol; | ||
595 | s->v4.cport = cp->cport; | ||
596 | s->v4.vport = cp->vport; | ||
597 | s->v4.dport = cp->dport; | ||
598 | s->v4.fwmark = htonl(cp->fwmark); | ||
599 | s->v4.timeout = htonl(cp->timeout / HZ); | ||
600 | m->nr_conns++; | ||
601 | |||
602 | #ifdef CONFIG_IP_VS_IPV6 | ||
603 | if (cp->af == AF_INET6) { | ||
604 | p += sizeof(struct ip_vs_sync_v6); | ||
605 | ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); | ||
606 | ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); | ||
607 | ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); | ||
608 | } else | ||
609 | #endif | ||
610 | { | ||
611 | p += sizeof(struct ip_vs_sync_v4); /* options ptr */ | ||
612 | s->v4.caddr = cp->caddr.ip; | ||
613 | s->v4.vaddr = cp->vaddr.ip; | ||
614 | s->v4.daddr = cp->daddr.ip; | ||
615 | } | ||
616 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | ||
617 | *(p++) = IPVS_OPT_SEQ_DATA; | ||
618 | *(p++) = sizeof(struct ip_vs_sync_conn_options); | ||
619 | hton_seq((struct ip_vs_seq *)p, &cp->in_seq); | ||
620 | p += sizeof(struct ip_vs_seq); | ||
621 | hton_seq((struct ip_vs_seq *)p, &cp->out_seq); | ||
622 | p += sizeof(struct ip_vs_seq); | ||
623 | } | ||
624 | /* Handle pe data */ | ||
625 | if (cp->pe_data_len && cp->pe_data) { | ||
626 | *(p++) = IPVS_OPT_PE_DATA; | ||
627 | *(p++) = cp->pe_data_len; | ||
628 | memcpy(p, cp->pe_data, cp->pe_data_len); | ||
629 | p += cp->pe_data_len; | ||
630 | if (pe_name_len) { | ||
631 | /* Add PE_NAME */ | ||
632 | *(p++) = IPVS_OPT_PE_NAME; | ||
633 | *(p++) = pe_name_len; | ||
634 | memcpy(p, cp->pe->name, pe_name_len); | ||
635 | p += pe_name_len; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | spin_unlock(&ipvs->sync_buff_lock); | ||
640 | |||
641 | control: | ||
642 | /* synchronize its controller if it has */ | ||
643 | cp = cp->control; | ||
644 | if (!cp) | ||
645 | return; | ||
646 | /* | ||
647 | * Reduce sync rate for templates | ||
648 | * i.e only increment in_pkts for Templates. | ||
649 | */ | ||
650 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) { | ||
651 | int pkts = atomic_add_return(1, &cp->in_pkts); | ||
652 | |||
653 | if (pkts % ipvs->sysctl_sync_threshold[1] != 1) | ||
654 | return; | ||
655 | } | ||
656 | goto sloop; | ||
289 | } | 657 | } |
290 | 658 | ||
659 | /* | ||
660 | * fill_param used by version 1 | ||
661 | */ | ||
291 | static inline int | 662 | static inline int |
292 | ip_vs_conn_fill_param_sync(int af, int protocol, | 663 | ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, |
293 | const union nf_inet_addr *caddr, __be16 cport, | 664 | struct ip_vs_conn_param *p, |
294 | const union nf_inet_addr *vaddr, __be16 vport, | 665 | __u8 *pe_data, unsigned int pe_data_len, |
295 | struct ip_vs_conn_param *p) | 666 | __u8 *pe_name, unsigned int pe_name_len) |
296 | { | 667 | { |
297 | /* XXX: Need to take into account persistence engine */ | 668 | #ifdef CONFIG_IP_VS_IPV6 |
298 | ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); | 669 | if (af == AF_INET6) |
670 | ip_vs_conn_fill_param(net, af, sc->v6.protocol, | ||
671 | (const union nf_inet_addr *)&sc->v6.caddr, | ||
672 | sc->v6.cport, | ||
673 | (const union nf_inet_addr *)&sc->v6.vaddr, | ||
674 | sc->v6.vport, p); | ||
675 | else | ||
676 | #endif | ||
677 | ip_vs_conn_fill_param(net, af, sc->v4.protocol, | ||
678 | (const union nf_inet_addr *)&sc->v4.caddr, | ||
679 | sc->v4.cport, | ||
680 | (const union nf_inet_addr *)&sc->v4.vaddr, | ||
681 | sc->v4.vport, p); | ||
682 | /* Handle pe data */ | ||
683 | if (pe_data_len) { | ||
684 | if (pe_name_len) { | ||
685 | char buff[IP_VS_PENAME_MAXLEN+1]; | ||
686 | |||
687 | memcpy(buff, pe_name, pe_name_len); | ||
688 | buff[pe_name_len]=0; | ||
689 | p->pe = __ip_vs_pe_getbyname(buff); | ||
690 | if (!p->pe) { | ||
691 | IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", | ||
692 | buff); | ||
693 | return 1; | ||
694 | } | ||
695 | } else { | ||
696 | IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); | ||
697 | return 1; | ||
698 | } | ||
699 | |||
700 | p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); | ||
701 | if (!p->pe_data) { | ||
702 | if (p->pe->module) | ||
703 | module_put(p->pe->module); | ||
704 | return -ENOMEM; | ||
705 | } | ||
706 | memcpy(p->pe_data, pe_data, pe_data_len); | ||
707 | p->pe_data_len = pe_data_len; | ||
708 | } | ||
299 | return 0; | 709 | return 0; |
300 | } | 710 | } |
301 | 711 | ||
302 | /* | 712 | /* |
303 | * Process received multicast message and create the corresponding | 713 | * Connection Add / Update. |
304 | * ip_vs_conn entries. | 714 | * Common for version 0 and 1 reception of backup sync_conns. |
715 | * Param: ... | ||
716 | * timeout is in sec. | ||
305 | */ | 717 | */ |
306 | static void ip_vs_process_message(const char *buffer, const size_t buflen) | 718 | static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, |
719 | unsigned int flags, unsigned int state, | ||
720 | unsigned int protocol, unsigned int type, | ||
721 | const union nf_inet_addr *daddr, __be16 dport, | ||
722 | unsigned long timeout, __u32 fwmark, | ||
723 | struct ip_vs_sync_conn_options *opt) | ||
307 | { | 724 | { |
308 | struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; | ||
309 | struct ip_vs_sync_conn *s; | ||
310 | struct ip_vs_sync_conn_options *opt; | ||
311 | struct ip_vs_conn *cp; | ||
312 | struct ip_vs_protocol *pp; | ||
313 | struct ip_vs_dest *dest; | 725 | struct ip_vs_dest *dest; |
314 | struct ip_vs_conn_param param; | 726 | struct ip_vs_conn *cp; |
315 | char *p; | 727 | struct netns_ipvs *ipvs = net_ipvs(net); |
316 | int i; | ||
317 | 728 | ||
318 | if (buflen < sizeof(struct ip_vs_sync_mesg)) { | 729 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
319 | IP_VS_ERR_RL("sync message header too short\n"); | 730 | cp = ip_vs_conn_in_get(param); |
320 | return; | 731 | else |
321 | } | 732 | cp = ip_vs_ct_in_get(param); |
322 | 733 | ||
323 | /* Convert size back to host byte order */ | 734 | if (cp && param->pe_data) /* Free pe_data */ |
324 | m->size = ntohs(m->size); | 735 | kfree(param->pe_data); |
736 | if (!cp) { | ||
737 | /* | ||
738 | * Find the appropriate destination for the connection. | ||
739 | * If it is not found the connection will remain unbound | ||
740 | * but still handled. | ||
741 | */ | ||
742 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, | ||
743 | param->vport, protocol, fwmark); | ||
325 | 744 | ||
326 | if (buflen != m->size) { | 745 | /* Set the approprite ativity flag */ |
327 | IP_VS_ERR_RL("bogus sync message size\n"); | 746 | if (protocol == IPPROTO_TCP) { |
328 | return; | 747 | if (state != IP_VS_TCP_S_ESTABLISHED) |
748 | flags |= IP_VS_CONN_F_INACTIVE; | ||
749 | else | ||
750 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
751 | } else if (protocol == IPPROTO_SCTP) { | ||
752 | if (state != IP_VS_SCTP_S_ESTABLISHED) | ||
753 | flags |= IP_VS_CONN_F_INACTIVE; | ||
754 | else | ||
755 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
756 | } | ||
757 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); | ||
758 | if (dest) | ||
759 | atomic_dec(&dest->refcnt); | ||
760 | if (!cp) { | ||
761 | if (param->pe_data) | ||
762 | kfree(param->pe_data); | ||
763 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | ||
764 | return; | ||
765 | } | ||
766 | } else if (!cp->dest) { | ||
767 | dest = ip_vs_try_bind_dest(cp); | ||
768 | if (dest) | ||
769 | atomic_dec(&dest->refcnt); | ||
770 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
771 | (cp->state != state)) { | ||
772 | /* update active/inactive flag for the connection */ | ||
773 | dest = cp->dest; | ||
774 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
775 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
776 | atomic_dec(&dest->activeconns); | ||
777 | atomic_inc(&dest->inactconns); | ||
778 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
779 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
780 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
781 | atomic_inc(&dest->activeconns); | ||
782 | atomic_dec(&dest->inactconns); | ||
783 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
784 | } | ||
785 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | ||
786 | (cp->state != state)) { | ||
787 | dest = cp->dest; | ||
788 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
789 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | ||
790 | atomic_dec(&dest->activeconns); | ||
791 | atomic_inc(&dest->inactconns); | ||
792 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
793 | } | ||
329 | } | 794 | } |
330 | 795 | ||
331 | /* SyncID sanity check */ | 796 | if (opt) |
332 | if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { | 797 | memcpy(&cp->in_seq, opt, sizeof(*opt)); |
333 | IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", | 798 | atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); |
334 | m->syncid); | 799 | cp->state = state; |
335 | return; | 800 | cp->old_state = cp->state; |
801 | /* | ||
802 | * For Ver 0 messages style | ||
803 | * - Not possible to recover the right timeout for templates | ||
804 | * - can not find the right fwmark | ||
805 | * virtual service. If needed, we can do it for | ||
806 | * non-fwmark persistent services. | ||
807 | * Ver 1 messages style. | ||
808 | * - No problem. | ||
809 | */ | ||
810 | if (timeout) { | ||
811 | if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) | ||
812 | timeout = MAX_SCHEDULE_TIMEOUT / HZ; | ||
813 | cp->timeout = timeout*HZ; | ||
814 | } else { | ||
815 | struct ip_vs_proto_data *pd; | ||
816 | |||
817 | pd = ip_vs_proto_data_get(net, protocol); | ||
818 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) | ||
819 | cp->timeout = pd->timeout_table[state]; | ||
820 | else | ||
821 | cp->timeout = (3*60*HZ); | ||
336 | } | 822 | } |
823 | ip_vs_conn_put(cp); | ||
824 | } | ||
337 | 825 | ||
338 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | 826 | /* |
827 | * Process received multicast message for Version 0 | ||
828 | */ | ||
829 | static void ip_vs_process_message_v0(struct net *net, const char *buffer, | ||
830 | const size_t buflen) | ||
831 | { | ||
832 | struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; | ||
833 | struct ip_vs_sync_conn_v0 *s; | ||
834 | struct ip_vs_sync_conn_options *opt; | ||
835 | struct ip_vs_protocol *pp; | ||
836 | struct ip_vs_conn_param param; | ||
837 | char *p; | ||
838 | int i; | ||
839 | |||
840 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); | ||
339 | for (i=0; i<m->nr_conns; i++) { | 841 | for (i=0; i<m->nr_conns; i++) { |
340 | unsigned flags, state; | 842 | unsigned flags, state; |
341 | 843 | ||
342 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { | 844 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { |
343 | IP_VS_ERR_RL("bogus conn in sync message\n"); | 845 | IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); |
344 | return; | 846 | return; |
345 | } | 847 | } |
346 | s = (struct ip_vs_sync_conn *) p; | 848 | s = (struct ip_vs_sync_conn_v0 *) p; |
347 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; | 849 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; |
348 | flags &= ~IP_VS_CONN_F_HASHED; | 850 | flags &= ~IP_VS_CONN_F_HASHED; |
349 | if (flags & IP_VS_CONN_F_SEQ_MASK) { | 851 | if (flags & IP_VS_CONN_F_SEQ_MASK) { |
350 | opt = (struct ip_vs_sync_conn_options *)&s[1]; | 852 | opt = (struct ip_vs_sync_conn_options *)&s[1]; |
351 | p += FULL_CONN_SIZE; | 853 | p += FULL_CONN_SIZE; |
352 | if (p > buffer+buflen) { | 854 | if (p > buffer+buflen) { |
353 | IP_VS_ERR_RL("bogus conn options in sync message\n"); | 855 | IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); |
354 | return; | 856 | return; |
355 | } | 857 | } |
356 | } else { | 858 | } else { |
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
362 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | 864 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
363 | pp = ip_vs_proto_get(s->protocol); | 865 | pp = ip_vs_proto_get(s->protocol); |
364 | if (!pp) { | 866 | if (!pp) { |
365 | IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", | 867 | IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", |
366 | s->protocol); | 868 | s->protocol); |
367 | continue; | 869 | continue; |
368 | } | 870 | } |
369 | if (state >= pp->num_states) { | 871 | if (state >= pp->num_states) { |
370 | IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", | 872 | IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", |
371 | pp->name, state); | 873 | pp->name, state); |
372 | continue; | 874 | continue; |
373 | } | 875 | } |
374 | } else { | 876 | } else { |
375 | /* protocol in templates is not used for state/timeout */ | 877 | /* protocol in templates is not used for state/timeout */ |
376 | pp = NULL; | ||
377 | if (state > 0) { | 878 | if (state > 0) { |
378 | IP_VS_DBG(2, "Invalid template state %u in sync msg\n", | 879 | IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", |
379 | state); | 880 | state); |
380 | state = 0; | 881 | state = 0; |
381 | } | 882 | } |
382 | } | 883 | } |
383 | 884 | ||
384 | { | 885 | ip_vs_conn_fill_param(net, AF_INET, s->protocol, |
385 | if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, | 886 | (const union nf_inet_addr *)&s->caddr, |
386 | (union nf_inet_addr *)&s->caddr, | 887 | s->cport, |
387 | s->cport, | 888 | (const union nf_inet_addr *)&s->vaddr, |
388 | (union nf_inet_addr *)&s->vaddr, | 889 | s->vport, ¶m); |
389 | s->vport, ¶m)) { | 890 | |
390 | pr_err("ip_vs_conn_fill_param_sync failed"); | 891 | /* Send timeout as Zero */ |
391 | return; | 892 | ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, |
893 | (union nf_inet_addr *)&s->daddr, s->dport, | ||
894 | 0, 0, opt); | ||
895 | } | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Handle options | ||
900 | */ | ||
901 | static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, | ||
902 | __u32 *opt_flags, | ||
903 | struct ip_vs_sync_conn_options *opt) | ||
904 | { | ||
905 | struct ip_vs_sync_conn_options *topt; | ||
906 | |||
907 | topt = (struct ip_vs_sync_conn_options *)p; | ||
908 | |||
909 | if (plen != sizeof(struct ip_vs_sync_conn_options)) { | ||
910 | IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); | ||
911 | return -EINVAL; | ||
912 | } | ||
913 | if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { | ||
914 | IP_VS_DBG(2, "BACKUP, conn options found twice\n"); | ||
915 | return -EINVAL; | ||
916 | } | ||
917 | ntoh_seq(&topt->in_seq, &opt->in_seq); | ||
918 | ntoh_seq(&topt->out_seq, &opt->out_seq); | ||
919 | *opt_flags |= IPVS_OPT_F_SEQ_DATA; | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, | ||
924 | __u8 **data, unsigned int maxlen, | ||
925 | __u32 *opt_flags, __u32 flag) | ||
926 | { | ||
927 | if (plen > maxlen) { | ||
928 | IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); | ||
929 | return -EINVAL; | ||
930 | } | ||
931 | if (*opt_flags & flag) { | ||
932 | IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); | ||
933 | return -EINVAL; | ||
934 | } | ||
935 | *data_len = plen; | ||
936 | *data = p; | ||
937 | *opt_flags |= flag; | ||
938 | return 0; | ||
939 | } | ||
940 | /* | ||
941 | * Process a Version 1 sync. connection | ||
942 | */ | ||
943 | static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) | ||
944 | { | ||
945 | struct ip_vs_sync_conn_options opt; | ||
946 | union ip_vs_sync_conn *s; | ||
947 | struct ip_vs_protocol *pp; | ||
948 | struct ip_vs_conn_param param; | ||
949 | __u32 flags; | ||
950 | unsigned int af, state, pe_data_len=0, pe_name_len=0; | ||
951 | __u8 *pe_data=NULL, *pe_name=NULL; | ||
952 | __u32 opt_flags=0; | ||
953 | int retc=0; | ||
954 | |||
955 | s = (union ip_vs_sync_conn *) p; | ||
956 | |||
957 | if (s->v6.type & STYPE_F_INET6) { | ||
958 | #ifdef CONFIG_IP_VS_IPV6 | ||
959 | af = AF_INET6; | ||
960 | p += sizeof(struct ip_vs_sync_v6); | ||
961 | #else | ||
962 | IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); | ||
963 | retc = 10; | ||
964 | goto out; | ||
965 | #endif | ||
966 | } else if (!s->v4.type) { | ||
967 | af = AF_INET; | ||
968 | p += sizeof(struct ip_vs_sync_v4); | ||
969 | } else { | ||
970 | return -10; | ||
971 | } | ||
972 | if (p > msg_end) | ||
973 | return -20; | ||
974 | |||
975 | /* Process optional params check Type & Len. */ | ||
976 | while (p < msg_end) { | ||
977 | int ptype; | ||
978 | int plen; | ||
979 | |||
980 | if (p+2 > msg_end) | ||
981 | return -30; | ||
982 | ptype = *(p++); | ||
983 | plen = *(p++); | ||
984 | |||
985 | if (!plen || ((p + plen) > msg_end)) | ||
986 | return -40; | ||
987 | /* Handle seq option p = param data */ | ||
988 | switch (ptype & ~IPVS_OPT_F_PARAM) { | ||
989 | case IPVS_OPT_SEQ_DATA: | ||
990 | if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) | ||
991 | return -50; | ||
992 | break; | ||
993 | |||
994 | case IPVS_OPT_PE_DATA: | ||
995 | if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, | ||
996 | IP_VS_PEDATA_MAXLEN, &opt_flags, | ||
997 | IPVS_OPT_F_PE_DATA)) | ||
998 | return -60; | ||
999 | break; | ||
1000 | |||
1001 | case IPVS_OPT_PE_NAME: | ||
1002 | if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, | ||
1003 | IP_VS_PENAME_MAXLEN, &opt_flags, | ||
1004 | IPVS_OPT_F_PE_NAME)) | ||
1005 | return -70; | ||
1006 | break; | ||
1007 | |||
1008 | default: | ||
1009 | /* Param data mandatory ? */ | ||
1010 | if (!(ptype & IPVS_OPT_F_PARAM)) { | ||
1011 | IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", | ||
1012 | ptype & ~IPVS_OPT_F_PARAM); | ||
1013 | retc = 20; | ||
1014 | goto out; | ||
392 | } | 1015 | } |
393 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | ||
394 | cp = ip_vs_conn_in_get(¶m); | ||
395 | else | ||
396 | cp = ip_vs_ct_in_get(¶m); | ||
397 | } | 1016 | } |
398 | if (!cp) { | 1017 | p += plen; /* Next option */ |
399 | /* | 1018 | } |
400 | * Find the appropriate destination for the connection. | 1019 | |
401 | * If it is not found the connection will remain unbound | 1020 | /* Get flags and Mask off unsupported */ |
402 | * but still handled. | 1021 | flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; |
403 | */ | 1022 | flags |= IP_VS_CONN_F_SYNC; |
404 | dest = ip_vs_find_dest(AF_INET, | 1023 | state = ntohs(s->v4.state); |
405 | (union nf_inet_addr *)&s->daddr, | 1024 | |
406 | s->dport, | 1025 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
407 | (union nf_inet_addr *)&s->vaddr, | 1026 | pp = ip_vs_proto_get(s->v4.protocol); |
408 | s->vport, | 1027 | if (!pp) { |
409 | s->protocol); | 1028 | IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", |
410 | /* Set the approprite ativity flag */ | 1029 | s->v4.protocol); |
411 | if (s->protocol == IPPROTO_TCP) { | 1030 | retc = 30; |
412 | if (state != IP_VS_TCP_S_ESTABLISHED) | 1031 | goto out; |
413 | flags |= IP_VS_CONN_F_INACTIVE; | 1032 | } |
414 | else | 1033 | if (state >= pp->num_states) { |
415 | flags &= ~IP_VS_CONN_F_INACTIVE; | 1034 | IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", |
416 | } else if (s->protocol == IPPROTO_SCTP) { | 1035 | pp->name, state); |
417 | if (state != IP_VS_SCTP_S_ESTABLISHED) | 1036 | retc = 40; |
418 | flags |= IP_VS_CONN_F_INACTIVE; | 1037 | goto out; |
419 | else | 1038 | } |
420 | flags &= ~IP_VS_CONN_F_INACTIVE; | 1039 | } else { |
1040 | /* protocol in templates is not used for state/timeout */ | ||
1041 | if (state > 0) { | ||
1042 | IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", | ||
1043 | state); | ||
1044 | state = 0; | ||
1045 | } | ||
1046 | } | ||
1047 | if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, | ||
1048 | pe_data_len, pe_name, pe_name_len)) { | ||
1049 | retc = 50; | ||
1050 | goto out; | ||
1051 | } | ||
1052 | /* If only IPv4, just silent skip IPv6 */ | ||
1053 | if (af == AF_INET) | ||
1054 | ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, | ||
1055 | (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, | ||
1056 | ntohl(s->v4.timeout), ntohl(s->v4.fwmark), | ||
1057 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | ||
1058 | ); | ||
1059 | #ifdef CONFIG_IP_VS_IPV6 | ||
1060 | else | ||
1061 | ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, | ||
1062 | (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, | ||
1063 | ntohl(s->v6.timeout), ntohl(s->v6.fwmark), | ||
1064 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | ||
1065 | ); | ||
1066 | #endif | ||
1067 | return 0; | ||
1068 | /* Error exit */ | ||
1069 | out: | ||
1070 | IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); | ||
1071 | return retc; | ||
1072 | |||
1073 | } | ||
1074 | /* | ||
1075 | * Process received multicast message and create the corresponding | ||
1076 | * ip_vs_conn entries. | ||
1077 | * Handles Version 0 & 1 | ||
1078 | */ | ||
1079 | static void ip_vs_process_message(struct net *net, __u8 *buffer, | ||
1080 | const size_t buflen) | ||
1081 | { | ||
1082 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1083 | struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; | ||
1084 | __u8 *p, *msg_end; | ||
1085 | int i, nr_conns; | ||
1086 | |||
1087 | if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { | ||
1088 | IP_VS_DBG(2, "BACKUP, message header too short\n"); | ||
1089 | return; | ||
1090 | } | ||
1091 | /* Convert size back to host byte order */ | ||
1092 | m2->size = ntohs(m2->size); | ||
1093 | |||
1094 | if (buflen != m2->size) { | ||
1095 | IP_VS_DBG(2, "BACKUP, bogus message size\n"); | ||
1096 | return; | ||
1097 | } | ||
1098 | /* SyncID sanity check */ | ||
1099 | if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { | ||
1100 | IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); | ||
1101 | return; | ||
1102 | } | ||
1103 | /* Handle version 1 message */ | ||
1104 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) | ||
1105 | && (m2->spare == 0)) { | ||
1106 | |||
1107 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg); | ||
1108 | nr_conns = m2->nr_conns; | ||
1109 | |||
1110 | for (i=0; i<nr_conns; i++) { | ||
1111 | union ip_vs_sync_conn *s; | ||
1112 | unsigned size; | ||
1113 | int retc; | ||
1114 | |||
1115 | p = msg_end; | ||
1116 | if (p + sizeof(s->v4) > buffer+buflen) { | ||
1117 | IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); | ||
1118 | return; | ||
421 | } | 1119 | } |
422 | cp = ip_vs_conn_new(¶m, | 1120 | s = (union ip_vs_sync_conn *)p; |
423 | (union nf_inet_addr *)&s->daddr, | 1121 | size = ntohs(s->v4.ver_size) & SVER_MASK; |
424 | s->dport, flags, dest); | 1122 | msg_end = p + size; |
425 | if (dest) | 1123 | /* Basic sanity checks */ |
426 | atomic_dec(&dest->refcnt); | 1124 | if (msg_end > buffer+buflen) { |
427 | if (!cp) { | 1125 | IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); |
428 | pr_err("ip_vs_conn_new failed\n"); | ||
429 | return; | 1126 | return; |
430 | } | 1127 | } |
431 | } else if (!cp->dest) { | 1128 | if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { |
432 | dest = ip_vs_try_bind_dest(cp); | 1129 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", |
433 | if (dest) | 1130 | ntohs(s->v4.ver_size) >> SVER_SHIFT); |
434 | atomic_dec(&dest->refcnt); | 1131 | return; |
435 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
436 | (cp->state != state)) { | ||
437 | /* update active/inactive flag for the connection */ | ||
438 | dest = cp->dest; | ||
439 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
440 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
441 | atomic_dec(&dest->activeconns); | ||
442 | atomic_inc(&dest->inactconns); | ||
443 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
444 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
445 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
446 | atomic_inc(&dest->activeconns); | ||
447 | atomic_dec(&dest->inactconns); | ||
448 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
449 | } | 1132 | } |
450 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | 1133 | /* Process a single sync_conn */ |
451 | (cp->state != state)) { | 1134 | retc = ip_vs_proc_sync_conn(net, p, msg_end); |
452 | dest = cp->dest; | 1135 | if (retc < 0) { |
453 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | 1136 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", |
454 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | 1137 | retc); |
455 | atomic_dec(&dest->activeconns); | 1138 | return; |
456 | atomic_inc(&dest->inactconns); | ||
457 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
458 | } | 1139 | } |
1140 | /* Make sure we have 32 bit alignment */ | ||
1141 | msg_end = p + ((size + 3) & ~3); | ||
459 | } | 1142 | } |
460 | 1143 | } else { | |
461 | if (opt) | 1144 | /* Old type of message */ |
462 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | 1145 | ip_vs_process_message_v0(net, buffer, buflen); |
463 | atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); | 1146 | return; |
464 | cp->state = state; | ||
465 | cp->old_state = cp->state; | ||
466 | /* | ||
467 | * We can not recover the right timeout for templates | ||
468 | * in all cases, we can not find the right fwmark | ||
469 | * virtual service. If needed, we can do it for | ||
470 | * non-fwmark persistent services. | ||
471 | */ | ||
472 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) | ||
473 | cp->timeout = pp->timeout_table[state]; | ||
474 | else | ||
475 | cp->timeout = (3*60*HZ); | ||
476 | ip_vs_conn_put(cp); | ||
477 | } | 1147 | } |
478 | } | 1148 | } |
479 | 1149 | ||
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname) | |||
511 | { | 1181 | { |
512 | struct net_device *dev; | 1182 | struct net_device *dev; |
513 | struct inet_sock *inet = inet_sk(sk); | 1183 | struct inet_sock *inet = inet_sk(sk); |
1184 | struct net *net = sock_net(sk); | ||
514 | 1185 | ||
515 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1186 | dev = __dev_get_by_name(net, ifname); |
1187 | if (!dev) | ||
516 | return -ENODEV; | 1188 | return -ENODEV; |
517 | 1189 | ||
518 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1190 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname) | |||
531 | * Set the maximum length of sync message according to the | 1203 | * Set the maximum length of sync message according to the |
532 | * specified interface's MTU. | 1204 | * specified interface's MTU. |
533 | */ | 1205 | */ |
534 | static int set_sync_mesg_maxlen(int sync_state) | 1206 | static int set_sync_mesg_maxlen(struct net *net, int sync_state) |
535 | { | 1207 | { |
1208 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
536 | struct net_device *dev; | 1209 | struct net_device *dev; |
537 | int num; | 1210 | int num; |
538 | 1211 | ||
539 | if (sync_state == IP_VS_STATE_MASTER) { | 1212 | if (sync_state == IP_VS_STATE_MASTER) { |
540 | if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) | 1213 | dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); |
1214 | if (!dev) | ||
541 | return -ENODEV; | 1215 | return -ENODEV; |
542 | 1216 | ||
543 | num = (dev->mtu - sizeof(struct iphdr) - | 1217 | num = (dev->mtu - sizeof(struct iphdr) - |
544 | sizeof(struct udphdr) - | 1218 | sizeof(struct udphdr) - |
545 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; | 1219 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; |
546 | sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + | 1220 | ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + |
547 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); | 1221 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); |
548 | IP_VS_DBG(7, "setting the maximum length of sync sending " | 1222 | IP_VS_DBG(7, "setting the maximum length of sync sending " |
549 | "message %d.\n", sync_send_mesg_maxlen); | 1223 | "message %d.\n", ipvs->send_mesg_maxlen); |
550 | } else if (sync_state == IP_VS_STATE_BACKUP) { | 1224 | } else if (sync_state == IP_VS_STATE_BACKUP) { |
551 | if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) | 1225 | dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); |
1226 | if (!dev) | ||
552 | return -ENODEV; | 1227 | return -ENODEV; |
553 | 1228 | ||
554 | sync_recv_mesg_maxlen = dev->mtu - | 1229 | ipvs->recv_mesg_maxlen = dev->mtu - |
555 | sizeof(struct iphdr) - sizeof(struct udphdr); | 1230 | sizeof(struct iphdr) - sizeof(struct udphdr); |
556 | IP_VS_DBG(7, "setting the maximum length of sync receiving " | 1231 | IP_VS_DBG(7, "setting the maximum length of sync receiving " |
557 | "message %d.\n", sync_recv_mesg_maxlen); | 1232 | "message %d.\n", ipvs->recv_mesg_maxlen); |
558 | } | 1233 | } |
559 | 1234 | ||
560 | return 0; | 1235 | return 0; |
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state) | |||
569 | static int | 1244 | static int |
570 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | 1245 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) |
571 | { | 1246 | { |
1247 | struct net *net = sock_net(sk); | ||
572 | struct ip_mreqn mreq; | 1248 | struct ip_mreqn mreq; |
573 | struct net_device *dev; | 1249 | struct net_device *dev; |
574 | int ret; | 1250 | int ret; |
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | |||
576 | memset(&mreq, 0, sizeof(mreq)); | 1252 | memset(&mreq, 0, sizeof(mreq)); |
577 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); | 1253 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); |
578 | 1254 | ||
579 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1255 | dev = __dev_get_by_name(net, ifname); |
1256 | if (!dev) | ||
580 | return -ENODEV; | 1257 | return -ENODEV; |
581 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1258 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
582 | return -EINVAL; | 1259 | return -EINVAL; |
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | |||
593 | 1270 | ||
594 | static int bind_mcastif_addr(struct socket *sock, char *ifname) | 1271 | static int bind_mcastif_addr(struct socket *sock, char *ifname) |
595 | { | 1272 | { |
1273 | struct net *net = sock_net(sock->sk); | ||
596 | struct net_device *dev; | 1274 | struct net_device *dev; |
597 | __be32 addr; | 1275 | __be32 addr; |
598 | struct sockaddr_in sin; | 1276 | struct sockaddr_in sin; |
599 | 1277 | ||
600 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1278 | dev = __dev_get_by_name(net, ifname); |
1279 | if (!dev) | ||
601 | return -ENODEV; | 1280 | return -ENODEV; |
602 | 1281 | ||
603 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 1282 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
@@ -619,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) | |||
619 | /* | 1298 | /* |
620 | * Set up sending multicast socket over UDP | 1299 | * Set up sending multicast socket over UDP |
621 | */ | 1300 | */ |
622 | static struct socket * make_send_sock(void) | 1301 | static struct socket *make_send_sock(struct net *net) |
623 | { | 1302 | { |
1303 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
624 | struct socket *sock; | 1304 | struct socket *sock; |
625 | int result; | 1305 | int result; |
626 | 1306 | ||
@@ -631,7 +1311,7 @@ static struct socket * make_send_sock(void) | |||
631 | return ERR_PTR(result); | 1311 | return ERR_PTR(result); |
632 | } | 1312 | } |
633 | 1313 | ||
634 | result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); | 1314 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); |
635 | if (result < 0) { | 1315 | if (result < 0) { |
636 | pr_err("Error setting outbound mcast interface\n"); | 1316 | pr_err("Error setting outbound mcast interface\n"); |
637 | goto error; | 1317 | goto error; |
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void) | |||
640 | set_mcast_loop(sock->sk, 0); | 1320 | set_mcast_loop(sock->sk, 0); |
641 | set_mcast_ttl(sock->sk, 1); | 1321 | set_mcast_ttl(sock->sk, 1); |
642 | 1322 | ||
643 | result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); | 1323 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); |
644 | if (result < 0) { | 1324 | if (result < 0) { |
645 | pr_err("Error binding address of the mcast interface\n"); | 1325 | pr_err("Error binding address of the mcast interface\n"); |
646 | goto error; | 1326 | goto error; |
@@ -664,8 +1344,9 @@ static struct socket * make_send_sock(void) | |||
664 | /* | 1344 | /* |
665 | * Set up receiving multicast socket over UDP | 1345 | * Set up receiving multicast socket over UDP |
666 | */ | 1346 | */ |
667 | static struct socket * make_receive_sock(void) | 1347 | static struct socket *make_receive_sock(struct net *net) |
668 | { | 1348 | { |
1349 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
669 | struct socket *sock; | 1350 | struct socket *sock; |
670 | int result; | 1351 | int result; |
671 | 1352 | ||
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void) | |||
689 | /* join the multicast group */ | 1370 | /* join the multicast group */ |
690 | result = join_mcast_group(sock->sk, | 1371 | result = join_mcast_group(sock->sk, |
691 | (struct in_addr *) &mcast_addr.sin_addr, | 1372 | (struct in_addr *) &mcast_addr.sin_addr, |
692 | ip_vs_backup_mcast_ifn); | 1373 | ipvs->backup_mcast_ifn); |
693 | if (result < 0) { | 1374 | if (result < 0) { |
694 | pr_err("Error joining to the multicast group\n"); | 1375 | pr_err("Error joining to the multicast group\n"); |
695 | goto error; | 1376 | goto error; |
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | |||
760 | static int sync_thread_master(void *data) | 1441 | static int sync_thread_master(void *data) |
761 | { | 1442 | { |
762 | struct ip_vs_sync_thread_data *tinfo = data; | 1443 | struct ip_vs_sync_thread_data *tinfo = data; |
1444 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | ||
763 | struct ip_vs_sync_buff *sb; | 1445 | struct ip_vs_sync_buff *sb; |
764 | 1446 | ||
765 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " | 1447 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " |
766 | "syncid = %d\n", | 1448 | "syncid = %d\n", |
767 | ip_vs_master_mcast_ifn, ip_vs_master_syncid); | 1449 | ipvs->master_mcast_ifn, ipvs->master_syncid); |
768 | 1450 | ||
769 | while (!kthread_should_stop()) { | 1451 | while (!kthread_should_stop()) { |
770 | while ((sb = sb_dequeue())) { | 1452 | while ((sb = sb_dequeue(ipvs))) { |
771 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1453 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); |
772 | ip_vs_sync_buff_release(sb); | 1454 | ip_vs_sync_buff_release(sb); |
773 | } | 1455 | } |
774 | 1456 | ||
775 | /* check if entries stay in curr_sb for 2 seconds */ | 1457 | /* check if entries stay in ipvs->sync_buff for 2 seconds */ |
776 | sb = get_curr_sync_buff(2 * HZ); | 1458 | sb = get_curr_sync_buff(ipvs, 2 * HZ); |
777 | if (sb) { | 1459 | if (sb) { |
778 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1460 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); |
779 | ip_vs_sync_buff_release(sb); | 1461 | ip_vs_sync_buff_release(sb); |
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data) | |||
783 | } | 1465 | } |
784 | 1466 | ||
785 | /* clean up the sync_buff queue */ | 1467 | /* clean up the sync_buff queue */ |
786 | while ((sb=sb_dequeue())) { | 1468 | while ((sb = sb_dequeue(ipvs))) |
787 | ip_vs_sync_buff_release(sb); | 1469 | ip_vs_sync_buff_release(sb); |
788 | } | ||
789 | 1470 | ||
790 | /* clean up the current sync_buff */ | 1471 | /* clean up the current sync_buff */ |
791 | if ((sb = get_curr_sync_buff(0))) { | 1472 | sb = get_curr_sync_buff(ipvs, 0); |
1473 | if (sb) | ||
792 | ip_vs_sync_buff_release(sb); | 1474 | ip_vs_sync_buff_release(sb); |
793 | } | ||
794 | 1475 | ||
795 | /* release the sending multicast socket */ | 1476 | /* release the sending multicast socket */ |
796 | sock_release(tinfo->sock); | 1477 | sock_release(tinfo->sock); |
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data) | |||
803 | static int sync_thread_backup(void *data) | 1484 | static int sync_thread_backup(void *data) |
804 | { | 1485 | { |
805 | struct ip_vs_sync_thread_data *tinfo = data; | 1486 | struct ip_vs_sync_thread_data *tinfo = data; |
1487 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | ||
806 | int len; | 1488 | int len; |
807 | 1489 | ||
808 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " | 1490 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " |
809 | "syncid = %d\n", | 1491 | "syncid = %d\n", |
810 | ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); | 1492 | ipvs->backup_mcast_ifn, ipvs->backup_syncid); |
811 | 1493 | ||
812 | while (!kthread_should_stop()) { | 1494 | while (!kthread_should_stop()) { |
813 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), | 1495 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), |
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data) | |||
817 | /* do we have data now? */ | 1499 | /* do we have data now? */ |
818 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { | 1500 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { |
819 | len = ip_vs_receive(tinfo->sock, tinfo->buf, | 1501 | len = ip_vs_receive(tinfo->sock, tinfo->buf, |
820 | sync_recv_mesg_maxlen); | 1502 | ipvs->recv_mesg_maxlen); |
821 | if (len <= 0) { | 1503 | if (len <= 0) { |
822 | pr_err("receiving message error\n"); | 1504 | pr_err("receiving message error\n"); |
823 | break; | 1505 | break; |
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data) | |||
826 | /* disable bottom half, because it accesses the data | 1508 | /* disable bottom half, because it accesses the data |
827 | shared by softirq while getting/creating conns */ | 1509 | shared by softirq while getting/creating conns */ |
828 | local_bh_disable(); | 1510 | local_bh_disable(); |
829 | ip_vs_process_message(tinfo->buf, len); | 1511 | ip_vs_process_message(tinfo->net, tinfo->buf, len); |
830 | local_bh_enable(); | 1512 | local_bh_enable(); |
831 | } | 1513 | } |
832 | } | 1514 | } |
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data) | |||
840 | } | 1522 | } |
841 | 1523 | ||
842 | 1524 | ||
843 | int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | 1525 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) |
844 | { | 1526 | { |
845 | struct ip_vs_sync_thread_data *tinfo; | 1527 | struct ip_vs_sync_thread_data *tinfo; |
846 | struct task_struct **realtask, *task; | 1528 | struct task_struct **realtask, *task; |
847 | struct socket *sock; | 1529 | struct socket *sock; |
1530 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
848 | char *name, *buf = NULL; | 1531 | char *name, *buf = NULL; |
849 | int (*threadfn)(void *data); | 1532 | int (*threadfn)(void *data); |
850 | int result = -ENOMEM; | 1533 | int result = -ENOMEM; |
851 | 1534 | ||
852 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1535 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
853 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | 1536 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
854 | sizeof(struct ip_vs_sync_conn)); | 1537 | sizeof(struct ip_vs_sync_conn_v0)); |
855 | 1538 | ||
856 | if (state == IP_VS_STATE_MASTER) { | 1539 | if (state == IP_VS_STATE_MASTER) { |
857 | if (sync_master_thread) | 1540 | if (ipvs->master_thread) |
858 | return -EEXIST; | 1541 | return -EEXIST; |
859 | 1542 | ||
860 | strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, | 1543 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, |
861 | sizeof(ip_vs_master_mcast_ifn)); | 1544 | sizeof(ipvs->master_mcast_ifn)); |
862 | ip_vs_master_syncid = syncid; | 1545 | ipvs->master_syncid = syncid; |
863 | realtask = &sync_master_thread; | 1546 | realtask = &ipvs->master_thread; |
864 | name = "ipvs_syncmaster"; | 1547 | name = "ipvs_master:%d"; |
865 | threadfn = sync_thread_master; | 1548 | threadfn = sync_thread_master; |
866 | sock = make_send_sock(); | 1549 | sock = make_send_sock(net); |
867 | } else if (state == IP_VS_STATE_BACKUP) { | 1550 | } else if (state == IP_VS_STATE_BACKUP) { |
868 | if (sync_backup_thread) | 1551 | if (ipvs->backup_thread) |
869 | return -EEXIST; | 1552 | return -EEXIST; |
870 | 1553 | ||
871 | strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | 1554 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, |
872 | sizeof(ip_vs_backup_mcast_ifn)); | 1555 | sizeof(ipvs->backup_mcast_ifn)); |
873 | ip_vs_backup_syncid = syncid; | 1556 | ipvs->backup_syncid = syncid; |
874 | realtask = &sync_backup_thread; | 1557 | realtask = &ipvs->backup_thread; |
875 | name = "ipvs_syncbackup"; | 1558 | name = "ipvs_backup:%d"; |
876 | threadfn = sync_thread_backup; | 1559 | threadfn = sync_thread_backup; |
877 | sock = make_receive_sock(); | 1560 | sock = make_receive_sock(net); |
878 | } else { | 1561 | } else { |
879 | return -EINVAL; | 1562 | return -EINVAL; |
880 | } | 1563 | } |
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
884 | goto out; | 1567 | goto out; |
885 | } | 1568 | } |
886 | 1569 | ||
887 | set_sync_mesg_maxlen(state); | 1570 | set_sync_mesg_maxlen(net, state); |
888 | if (state == IP_VS_STATE_BACKUP) { | 1571 | if (state == IP_VS_STATE_BACKUP) { |
889 | buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); | 1572 | buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); |
890 | if (!buf) | 1573 | if (!buf) |
891 | goto outsocket; | 1574 | goto outsocket; |
892 | } | 1575 | } |
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
895 | if (!tinfo) | 1578 | if (!tinfo) |
896 | goto outbuf; | 1579 | goto outbuf; |
897 | 1580 | ||
1581 | tinfo->net = net; | ||
898 | tinfo->sock = sock; | 1582 | tinfo->sock = sock; |
899 | tinfo->buf = buf; | 1583 | tinfo->buf = buf; |
900 | 1584 | ||
901 | task = kthread_run(threadfn, tinfo, name); | 1585 | task = kthread_run(threadfn, tinfo, name, ipvs->gen); |
902 | if (IS_ERR(task)) { | 1586 | if (IS_ERR(task)) { |
903 | result = PTR_ERR(task); | 1587 | result = PTR_ERR(task); |
904 | goto outtinfo; | 1588 | goto outtinfo; |
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
906 | 1590 | ||
907 | /* mark as active */ | 1591 | /* mark as active */ |
908 | *realtask = task; | 1592 | *realtask = task; |
909 | ip_vs_sync_state |= state; | 1593 | ipvs->sync_state |= state; |
910 | 1594 | ||
911 | /* increase the module use count */ | 1595 | /* increase the module use count */ |
912 | ip_vs_use_count_inc(); | 1596 | ip_vs_use_count_inc(); |
@@ -924,16 +1608,18 @@ out: | |||
924 | } | 1608 | } |
925 | 1609 | ||
926 | 1610 | ||
927 | int stop_sync_thread(int state) | 1611 | int stop_sync_thread(struct net *net, int state) |
928 | { | 1612 | { |
1613 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1614 | |||
929 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1615 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
930 | 1616 | ||
931 | if (state == IP_VS_STATE_MASTER) { | 1617 | if (state == IP_VS_STATE_MASTER) { |
932 | if (!sync_master_thread) | 1618 | if (!ipvs->master_thread) |
933 | return -ESRCH; | 1619 | return -ESRCH; |
934 | 1620 | ||
935 | pr_info("stopping master sync thread %d ...\n", | 1621 | pr_info("stopping master sync thread %d ...\n", |
936 | task_pid_nr(sync_master_thread)); | 1622 | task_pid_nr(ipvs->master_thread)); |
937 | 1623 | ||
938 | /* | 1624 | /* |
939 | * The lock synchronizes with sb_queue_tail(), so that we don't | 1625 | * The lock synchronizes with sb_queue_tail(), so that we don't |
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state) | |||
941 | * progress of stopping the master sync daemon. | 1627 | * progress of stopping the master sync daemon. |
942 | */ | 1628 | */ |
943 | 1629 | ||
944 | spin_lock_bh(&ip_vs_sync_lock); | 1630 | spin_lock_bh(&ipvs->sync_lock); |
945 | ip_vs_sync_state &= ~IP_VS_STATE_MASTER; | 1631 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; |
946 | spin_unlock_bh(&ip_vs_sync_lock); | 1632 | spin_unlock_bh(&ipvs->sync_lock); |
947 | kthread_stop(sync_master_thread); | 1633 | kthread_stop(ipvs->master_thread); |
948 | sync_master_thread = NULL; | 1634 | ipvs->master_thread = NULL; |
949 | } else if (state == IP_VS_STATE_BACKUP) { | 1635 | } else if (state == IP_VS_STATE_BACKUP) { |
950 | if (!sync_backup_thread) | 1636 | if (!ipvs->backup_thread) |
951 | return -ESRCH; | 1637 | return -ESRCH; |
952 | 1638 | ||
953 | pr_info("stopping backup sync thread %d ...\n", | 1639 | pr_info("stopping backup sync thread %d ...\n", |
954 | task_pid_nr(sync_backup_thread)); | 1640 | task_pid_nr(ipvs->backup_thread)); |
955 | 1641 | ||
956 | ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; | 1642 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; |
957 | kthread_stop(sync_backup_thread); | 1643 | kthread_stop(ipvs->backup_thread); |
958 | sync_backup_thread = NULL; | 1644 | ipvs->backup_thread = NULL; |
959 | } else { | 1645 | } else { |
960 | return -EINVAL; | 1646 | return -EINVAL; |
961 | } | 1647 | } |
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state) | |||
965 | 1651 | ||
966 | return 0; | 1652 | return 0; |
967 | } | 1653 | } |
1654 | |||
1655 | /* | ||
1656 | * Initialize data struct for each netns | ||
1657 | */ | ||
1658 | static int __net_init __ip_vs_sync_init(struct net *net) | ||
1659 | { | ||
1660 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1661 | |||
1662 | INIT_LIST_HEAD(&ipvs->sync_queue); | ||
1663 | spin_lock_init(&ipvs->sync_lock); | ||
1664 | spin_lock_init(&ipvs->sync_buff_lock); | ||
1665 | |||
1666 | ipvs->sync_mcast_addr.sin_family = AF_INET; | ||
1667 | ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); | ||
1668 | ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); | ||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1672 | static void __ip_vs_sync_cleanup(struct net *net) | ||
1673 | { | ||
1674 | stop_sync_thread(net, IP_VS_STATE_MASTER); | ||
1675 | stop_sync_thread(net, IP_VS_STATE_BACKUP); | ||
1676 | } | ||
1677 | |||
1678 | static struct pernet_operations ipvs_sync_ops = { | ||
1679 | .init = __ip_vs_sync_init, | ||
1680 | .exit = __ip_vs_sync_cleanup, | ||
1681 | }; | ||
1682 | |||
1683 | |||
1684 | int __init ip_vs_sync_init(void) | ||
1685 | { | ||
1686 | return register_pernet_subsys(&ipvs_sync_ops); | ||
1687 | } | ||
1688 | |||
1689 | void __exit ip_vs_sync_cleanup(void) | ||
1690 | { | ||
1691 | unregister_pernet_subsys(&ipvs_sync_ops); | ||
1692 | } | ||
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 5325a3fbe4ac..1f2a4e35fb11 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb) | |||
175 | .fl4_tos = RT_TOS(iph->tos), | 175 | .fl4_tos = RT_TOS(iph->tos), |
176 | .mark = skb->mark, | 176 | .mark = skb->mark, |
177 | }; | 177 | }; |
178 | struct rtable *rt; | ||
179 | 178 | ||
180 | if (ip_route_output_key(net, &rt, &fl)) | 179 | if (ip_route_output_key(net, &rt, &fl)) |
181 | return 0; | 180 | return 0; |
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
390 | 389 | ||
391 | /* MTU checking */ | 390 | /* MTU checking */ |
392 | mtu = dst_mtu(&rt->dst); | 391 | mtu = dst_mtu(&rt->dst); |
393 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 392 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
393 | !skb_is_gso(skb)) { | ||
394 | ip_rt_put(rt); | 394 | ip_rt_put(rt); |
395 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 395 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
396 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 396 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
443 | 443 | ||
444 | /* MTU checking */ | 444 | /* MTU checking */ |
445 | mtu = dst_mtu(&rt->dst); | 445 | mtu = dst_mtu(&rt->dst); |
446 | if (skb->len > mtu) { | 446 | if (skb->len > mtu && !skb_is_gso(skb)) { |
447 | if (!skb->dev) { | 447 | if (!skb->dev) { |
448 | struct net *net = dev_net(skb_dst(skb)->dev); | 448 | struct net *net = dev_net(skb_dst(skb)->dev); |
449 | 449 | ||
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
543 | 543 | ||
544 | /* MTU checking */ | 544 | /* MTU checking */ |
545 | mtu = dst_mtu(&rt->dst); | 545 | mtu = dst_mtu(&rt->dst); |
546 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 546 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
547 | !skb_is_gso(skb)) { | ||
547 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 548 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
548 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, | 549 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, |
549 | "ip_vs_nat_xmit(): frag needed for"); | 550 | "ip_vs_nat_xmit(): frag needed for"); |
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
658 | 659 | ||
659 | /* MTU checking */ | 660 | /* MTU checking */ |
660 | mtu = dst_mtu(&rt->dst); | 661 | mtu = dst_mtu(&rt->dst); |
661 | if (skb->len > mtu) { | 662 | if (skb->len > mtu && !skb_is_gso(skb)) { |
662 | if (!skb->dev) { | 663 | if (!skb->dev) { |
663 | struct net *net = dev_net(skb_dst(skb)->dev); | 664 | struct net *net = dev_net(skb_dst(skb)->dev); |
664 | 665 | ||
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
773 | 774 | ||
774 | df |= (old_iph->frag_off & htons(IP_DF)); | 775 | df |= (old_iph->frag_off & htons(IP_DF)); |
775 | 776 | ||
776 | if ((old_iph->frag_off & htons(IP_DF)) | 777 | if ((old_iph->frag_off & htons(IP_DF) && |
777 | && mtu < ntohs(old_iph->tot_len)) { | 778 | mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { |
778 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 779 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
779 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 780 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
780 | goto tx_error_put; | 781 | goto tx_error_put; |
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
886 | if (skb_dst(skb)) | 887 | if (skb_dst(skb)) |
887 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 888 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
888 | 889 | ||
889 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { | 890 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && |
891 | !skb_is_gso(skb)) { | ||
890 | if (!skb->dev) { | 892 | if (!skb->dev) { |
891 | struct net *net = dev_net(skb_dst(skb)->dev); | 893 | struct net *net = dev_net(skb_dst(skb)->dev); |
892 | 894 | ||
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
991 | 993 | ||
992 | /* MTU checking */ | 994 | /* MTU checking */ |
993 | mtu = dst_mtu(&rt->dst); | 995 | mtu = dst_mtu(&rt->dst); |
994 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { | 996 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && |
997 | !skb_is_gso(skb)) { | ||
995 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 998 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
996 | ip_rt_put(rt); | 999 | ip_rt_put(rt); |
997 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1000 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1158 | 1161 | ||
1159 | /* MTU checking */ | 1162 | /* MTU checking */ |
1160 | mtu = dst_mtu(&rt->dst); | 1163 | mtu = dst_mtu(&rt->dst); |
1161 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { | 1164 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && |
1165 | !skb_is_gso(skb)) { | ||
1162 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 1166 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
1163 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1167 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1164 | goto tx_error_put; | 1168 | goto tx_error_put; |
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1272 | 1276 | ||
1273 | /* MTU checking */ | 1277 | /* MTU checking */ |
1274 | mtu = dst_mtu(&rt->dst); | 1278 | mtu = dst_mtu(&rt->dst); |
1275 | if (skb->len > mtu) { | 1279 | if (skb->len > mtu && !skb_is_gso(skb)) { |
1276 | if (!skb->dev) { | 1280 | if (!skb->dev) { |
1277 | struct net *net = dev_net(skb_dst(skb)->dev); | 1281 | struct net *net = dev_net(skb_dst(skb)->dev); |
1278 | 1282 | ||
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c new file mode 100644 index 000000000000..4e99cca61612 --- /dev/null +++ b/net/netfilter/nf_conntrack_broadcast.c | |||
@@ -0,0 +1,82 @@ | |||
1 | /* | ||
2 | * broadcast connection tracking helper | ||
3 | * | ||
4 | * (c) 2005 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <net/route.h> | ||
15 | #include <linux/inetdevice.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | |||
18 | #include <net/netfilter/nf_conntrack.h> | ||
19 | #include <net/netfilter/nf_conntrack_helper.h> | ||
20 | #include <net/netfilter/nf_conntrack_expect.h> | ||
21 | |||
22 | int nf_conntrack_broadcast_help(struct sk_buff *skb, | ||
23 | unsigned int protoff, | ||
24 | struct nf_conn *ct, | ||
25 | enum ip_conntrack_info ctinfo, | ||
26 | unsigned int timeout) | ||
27 | { | ||
28 | struct nf_conntrack_expect *exp; | ||
29 | struct iphdr *iph = ip_hdr(skb); | ||
30 | struct rtable *rt = skb_rtable(skb); | ||
31 | struct in_device *in_dev; | ||
32 | struct nf_conn_help *help = nfct_help(ct); | ||
33 | __be32 mask = 0; | ||
34 | |||
35 | /* we're only interested in locally generated packets */ | ||
36 | if (skb->sk == NULL) | ||
37 | goto out; | ||
38 | if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) | ||
39 | goto out; | ||
40 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
41 | goto out; | ||
42 | |||
43 | rcu_read_lock(); | ||
44 | in_dev = __in_dev_get_rcu(rt->dst.dev); | ||
45 | if (in_dev != NULL) { | ||
46 | for_primary_ifa(in_dev) { | ||
47 | if (ifa->ifa_broadcast == iph->daddr) { | ||
48 | mask = ifa->ifa_mask; | ||
49 | break; | ||
50 | } | ||
51 | } endfor_ifa(in_dev); | ||
52 | } | ||
53 | rcu_read_unlock(); | ||
54 | |||
55 | if (mask == 0) | ||
56 | goto out; | ||
57 | |||
58 | exp = nf_ct_expect_alloc(ct); | ||
59 | if (exp == NULL) | ||
60 | goto out; | ||
61 | |||
62 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
63 | exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port; | ||
64 | |||
65 | exp->mask.src.u3.ip = mask; | ||
66 | exp->mask.src.u.udp.port = htons(0xFFFF); | ||
67 | |||
68 | exp->expectfn = NULL; | ||
69 | exp->flags = NF_CT_EXPECT_PERMANENT; | ||
70 | exp->class = NF_CT_EXPECT_CLASS_DEFAULT; | ||
71 | exp->helper = NULL; | ||
72 | |||
73 | nf_ct_expect_related(exp); | ||
74 | nf_ct_expect_put(exp); | ||
75 | |||
76 | nf_ct_refresh(ct, skb, timeout * HZ); | ||
77 | out: | ||
78 | return NF_ACCEPT; | ||
79 | } | ||
80 | EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); | ||
81 | |||
82 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e61511929c66..1909311c392a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <net/netfilter/nf_conntrack_acct.h> | 43 | #include <net/netfilter/nf_conntrack_acct.h> |
44 | #include <net/netfilter/nf_conntrack_ecache.h> | 44 | #include <net/netfilter/nf_conntrack_ecache.h> |
45 | #include <net/netfilter/nf_conntrack_zones.h> | 45 | #include <net/netfilter/nf_conntrack_zones.h> |
46 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
46 | #include <net/netfilter/nf_nat.h> | 47 | #include <net/netfilter/nf_nat.h> |
47 | #include <net/netfilter/nf_nat_core.h> | 48 | #include <net/netfilter/nf_nat_core.h> |
48 | 49 | ||
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); | |||
282 | static void death_by_timeout(unsigned long ul_conntrack) | 283 | static void death_by_timeout(unsigned long ul_conntrack) |
283 | { | 284 | { |
284 | struct nf_conn *ct = (void *)ul_conntrack; | 285 | struct nf_conn *ct = (void *)ul_conntrack; |
286 | struct nf_conn_tstamp *tstamp; | ||
287 | |||
288 | tstamp = nf_conn_tstamp_find(ct); | ||
289 | if (tstamp && tstamp->stop == 0) | ||
290 | tstamp->stop = ktime_to_ns(ktime_get_real()); | ||
285 | 291 | ||
286 | if (!test_bit(IPS_DYING_BIT, &ct->status) && | 292 | if (!test_bit(IPS_DYING_BIT, &ct->status) && |
287 | unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { | 293 | unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { |
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
419 | struct nf_conntrack_tuple_hash *h; | 425 | struct nf_conntrack_tuple_hash *h; |
420 | struct nf_conn *ct; | 426 | struct nf_conn *ct; |
421 | struct nf_conn_help *help; | 427 | struct nf_conn_help *help; |
428 | struct nf_conn_tstamp *tstamp; | ||
422 | struct hlist_nulls_node *n; | 429 | struct hlist_nulls_node *n; |
423 | enum ip_conntrack_info ctinfo; | 430 | enum ip_conntrack_info ctinfo; |
424 | struct net *net; | 431 | struct net *net; |
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
486 | ct->timeout.expires += jiffies; | 493 | ct->timeout.expires += jiffies; |
487 | add_timer(&ct->timeout); | 494 | add_timer(&ct->timeout); |
488 | atomic_inc(&ct->ct_general.use); | 495 | atomic_inc(&ct->ct_general.use); |
489 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | 496 | ct->status |= IPS_CONFIRMED; |
497 | |||
498 | /* set conntrack timestamp, if enabled. */ | ||
499 | tstamp = nf_conn_tstamp_find(ct); | ||
500 | if (tstamp) { | ||
501 | if (skb->tstamp.tv64 == 0) | ||
502 | __net_timestamp((struct sk_buff *)skb); | ||
490 | 503 | ||
504 | tstamp->start = ktime_to_ns(skb->tstamp); | ||
505 | } | ||
491 | /* Since the lookup is lockless, hash insertion must be done after | 506 | /* Since the lookup is lockless, hash insertion must be done after |
492 | * starting the timer and setting the CONFIRMED bit. The RCU barriers | 507 | * starting the timer and setting the CONFIRMED bit. The RCU barriers |
493 | * guarantee that no other CPU can find the conntrack before the above | 508 | * guarantee that no other CPU can find the conntrack before the above |
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone, | |||
655 | * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. | 670 | * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. |
656 | */ | 671 | */ |
657 | memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, | 672 | memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, |
658 | sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); | 673 | offsetof(struct nf_conn, proto) - |
674 | offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); | ||
659 | spin_lock_init(&ct->lock); | 675 | spin_lock_init(&ct->lock); |
660 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | 676 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; |
661 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; | 677 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; |
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
745 | } | 761 | } |
746 | 762 | ||
747 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); | 763 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); |
764 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | ||
748 | 765 | ||
749 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; | 766 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; |
750 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, | 767 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, |
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report { | |||
1185 | static int kill_report(struct nf_conn *i, void *data) | 1202 | static int kill_report(struct nf_conn *i, void *data) |
1186 | { | 1203 | { |
1187 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; | 1204 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; |
1205 | struct nf_conn_tstamp *tstamp; | ||
1206 | |||
1207 | tstamp = nf_conn_tstamp_find(i); | ||
1208 | if (tstamp && tstamp->stop == 0) | ||
1209 | tstamp->stop = ktime_to_ns(ktime_get_real()); | ||
1188 | 1210 | ||
1189 | /* If we fail to deliver the event, death_by_timeout() will retry */ | 1211 | /* If we fail to deliver the event, death_by_timeout() will retry */ |
1190 | if (nf_conntrack_event_report(IPCT_DESTROY, i, | 1212 | if (nf_conntrack_event_report(IPCT_DESTROY, i, |
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data) | |||
1201 | return 1; | 1223 | return 1; |
1202 | } | 1224 | } |
1203 | 1225 | ||
1204 | void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) | 1226 | void nf_ct_free_hashtable(void *hash, unsigned int size) |
1205 | { | 1227 | { |
1206 | if (vmalloced) | 1228 | if (is_vmalloc_addr(hash)) |
1207 | vfree(hash); | 1229 | vfree(hash); |
1208 | else | 1230 | else |
1209 | free_pages((unsigned long)hash, | 1231 | free_pages((unsigned long)hash, |
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net) | |||
1270 | goto i_see_dead_people; | 1292 | goto i_see_dead_people; |
1271 | } | 1293 | } |
1272 | 1294 | ||
1273 | nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, | 1295 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); |
1274 | net->ct.htable_size); | ||
1275 | nf_conntrack_ecache_fini(net); | 1296 | nf_conntrack_ecache_fini(net); |
1276 | nf_conntrack_acct_fini(net); | 1297 | nf_conntrack_acct_fini(net); |
1277 | nf_conntrack_expect_fini(net); | 1298 | nf_conntrack_expect_fini(net); |
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net) | |||
1300 | } | 1321 | } |
1301 | } | 1322 | } |
1302 | 1323 | ||
1303 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) | 1324 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) |
1304 | { | 1325 | { |
1305 | struct hlist_nulls_head *hash; | 1326 | struct hlist_nulls_head *hash; |
1306 | unsigned int nr_slots, i; | 1327 | unsigned int nr_slots, i; |
1307 | size_t sz; | 1328 | size_t sz; |
1308 | 1329 | ||
1309 | *vmalloced = 0; | ||
1310 | |||
1311 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); | 1330 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); |
1312 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); | 1331 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); |
1313 | sz = nr_slots * sizeof(struct hlist_nulls_head); | 1332 | sz = nr_slots * sizeof(struct hlist_nulls_head); |
1314 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, | 1333 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, |
1315 | get_order(sz)); | 1334 | get_order(sz)); |
1316 | if (!hash) { | 1335 | if (!hash) { |
1317 | *vmalloced = 1; | ||
1318 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); | 1336 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); |
1319 | hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | 1337 | hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, |
1320 | PAGE_KERNEL); | 1338 | PAGE_KERNEL); |
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); | |||
1330 | 1348 | ||
1331 | int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | 1349 | int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) |
1332 | { | 1350 | { |
1333 | int i, bucket, vmalloced, old_vmalloced; | 1351 | int i, bucket; |
1334 | unsigned int hashsize, old_size; | 1352 | unsigned int hashsize, old_size; |
1335 | struct hlist_nulls_head *hash, *old_hash; | 1353 | struct hlist_nulls_head *hash, *old_hash; |
1336 | struct nf_conntrack_tuple_hash *h; | 1354 | struct nf_conntrack_tuple_hash *h; |
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1347 | if (!hashsize) | 1365 | if (!hashsize) |
1348 | return -EINVAL; | 1366 | return -EINVAL; |
1349 | 1367 | ||
1350 | hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); | 1368 | hash = nf_ct_alloc_hashtable(&hashsize, 1); |
1351 | if (!hash) | 1369 | if (!hash) |
1352 | return -ENOMEM; | 1370 | return -ENOMEM; |
1353 | 1371 | ||
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1369 | } | 1387 | } |
1370 | } | 1388 | } |
1371 | old_size = init_net.ct.htable_size; | 1389 | old_size = init_net.ct.htable_size; |
1372 | old_vmalloced = init_net.ct.hash_vmalloc; | ||
1373 | old_hash = init_net.ct.hash; | 1390 | old_hash = init_net.ct.hash; |
1374 | 1391 | ||
1375 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; | 1392 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; |
1376 | init_net.ct.hash_vmalloc = vmalloced; | ||
1377 | init_net.ct.hash = hash; | 1393 | init_net.ct.hash = hash; |
1378 | spin_unlock_bh(&nf_conntrack_lock); | 1394 | spin_unlock_bh(&nf_conntrack_lock); |
1379 | 1395 | ||
1380 | nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); | 1396 | nf_ct_free_hashtable(old_hash, old_size); |
1381 | return 0; | 1397 | return 0; |
1382 | } | 1398 | } |
1383 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); | 1399 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); |
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net) | |||
1490 | } | 1506 | } |
1491 | 1507 | ||
1492 | net->ct.htable_size = nf_conntrack_htable_size; | 1508 | net->ct.htable_size = nf_conntrack_htable_size; |
1493 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, | 1509 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); |
1494 | &net->ct.hash_vmalloc, 1); | ||
1495 | if (!net->ct.hash) { | 1510 | if (!net->ct.hash) { |
1496 | ret = -ENOMEM; | 1511 | ret = -ENOMEM; |
1497 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); | 1512 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); |
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net) | |||
1503 | ret = nf_conntrack_acct_init(net); | 1518 | ret = nf_conntrack_acct_init(net); |
1504 | if (ret < 0) | 1519 | if (ret < 0) |
1505 | goto err_acct; | 1520 | goto err_acct; |
1521 | ret = nf_conntrack_tstamp_init(net); | ||
1522 | if (ret < 0) | ||
1523 | goto err_tstamp; | ||
1506 | ret = nf_conntrack_ecache_init(net); | 1524 | ret = nf_conntrack_ecache_init(net); |
1507 | if (ret < 0) | 1525 | if (ret < 0) |
1508 | goto err_ecache; | 1526 | goto err_ecache; |
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net) | |||
1510 | return 0; | 1528 | return 0; |
1511 | 1529 | ||
1512 | err_ecache: | 1530 | err_ecache: |
1531 | nf_conntrack_tstamp_fini(net); | ||
1532 | err_tstamp: | ||
1513 | nf_conntrack_acct_fini(net); | 1533 | nf_conntrack_acct_fini(net); |
1514 | err_acct: | 1534 | err_acct: |
1515 | nf_conntrack_expect_fini(net); | 1535 | nf_conntrack_expect_fini(net); |
1516 | err_expect: | 1536 | err_expect: |
1517 | nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, | 1537 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); |
1518 | net->ct.htable_size); | ||
1519 | err_hash: | 1538 | err_hash: |
1520 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); | 1539 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); |
1521 | err_cache: | 1540 | err_cache: |
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index a20fb0bd1efe..cd1e8e0970f2 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c | |||
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) | |||
319 | const struct nf_conntrack_expect_policy *p; | 319 | const struct nf_conntrack_expect_policy *p; |
320 | unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); | 320 | unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); |
321 | 321 | ||
322 | atomic_inc(&exp->use); | 322 | /* two references : one for hash insert, one for the timer */ |
323 | atomic_add(2, &exp->use); | ||
323 | 324 | ||
324 | if (master_help) { | 325 | if (master_help) { |
325 | hlist_add_head(&exp->lnode, &master_help->expectations); | 326 | hlist_add_head(&exp->lnode, &master_help->expectations); |
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) | |||
333 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, | 334 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, |
334 | (unsigned long)exp); | 335 | (unsigned long)exp); |
335 | if (master_help) { | 336 | if (master_help) { |
336 | p = &master_help->helper->expect_policy[exp->class]; | 337 | p = &rcu_dereference_protected( |
338 | master_help->helper, | ||
339 | lockdep_is_held(&nf_conntrack_lock) | ||
340 | )->expect_policy[exp->class]; | ||
337 | exp->timeout.expires = jiffies + p->timeout * HZ; | 341 | exp->timeout.expires = jiffies + p->timeout * HZ; |
338 | } | 342 | } |
339 | add_timer(&exp->timeout); | 343 | add_timer(&exp->timeout); |
340 | 344 | ||
341 | atomic_inc(&exp->use); | ||
342 | NF_CT_STAT_INC(net, expect_create); | 345 | NF_CT_STAT_INC(net, expect_create); |
343 | } | 346 | } |
344 | 347 | ||
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) | |||
369 | if (!del_timer(&i->timeout)) | 372 | if (!del_timer(&i->timeout)) |
370 | return 0; | 373 | return 0; |
371 | 374 | ||
372 | p = &master_help->helper->expect_policy[i->class]; | 375 | p = &rcu_dereference_protected( |
376 | master_help->helper, | ||
377 | lockdep_is_held(&nf_conntrack_lock) | ||
378 | )->expect_policy[i->class]; | ||
373 | i->timeout.expires = jiffies + p->timeout * HZ; | 379 | i->timeout.expires = jiffies + p->timeout * HZ; |
374 | add_timer(&i->timeout); | 380 | add_timer(&i->timeout); |
375 | return 1; | 381 | return 1; |
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | |||
407 | } | 413 | } |
408 | /* Will be over limit? */ | 414 | /* Will be over limit? */ |
409 | if (master_help) { | 415 | if (master_help) { |
410 | p = &master_help->helper->expect_policy[expect->class]; | 416 | p = &rcu_dereference_protected( |
417 | master_help->helper, | ||
418 | lockdep_is_held(&nf_conntrack_lock) | ||
419 | )->expect_policy[expect->class]; | ||
411 | if (p->max_expected && | 420 | if (p->max_expected && |
412 | master_help->expecting[expect->class] >= p->max_expected) { | 421 | master_help->expecting[expect->class] >= p->max_expected) { |
413 | evict_oldest_expect(master, expect); | 422 | evict_oldest_expect(master, expect); |
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
478 | struct hlist_node *n; | 487 | struct hlist_node *n; |
479 | 488 | ||
480 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 489 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
481 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 490 | n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); |
482 | if (n) | 491 | if (n) |
483 | return n; | 492 | return n; |
484 | } | 493 | } |
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
491 | struct net *net = seq_file_net(seq); | 500 | struct net *net = seq_file_net(seq); |
492 | struct ct_expect_iter_state *st = seq->private; | 501 | struct ct_expect_iter_state *st = seq->private; |
493 | 502 | ||
494 | head = rcu_dereference(head->next); | 503 | head = rcu_dereference(hlist_next_rcu(head)); |
495 | while (head == NULL) { | 504 | while (head == NULL) { |
496 | if (++st->bucket >= nf_ct_expect_hsize) | 505 | if (++st->bucket >= nf_ct_expect_hsize) |
497 | return NULL; | 506 | return NULL; |
498 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 507 | head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); |
499 | } | 508 | } |
500 | return head; | 509 | return head; |
501 | } | 510 | } |
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net) | |||
630 | } | 639 | } |
631 | 640 | ||
632 | net->ct.expect_count = 0; | 641 | net->ct.expect_count = 0; |
633 | net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, | 642 | net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); |
634 | &net->ct.expect_vmalloc, 0); | ||
635 | if (net->ct.expect_hash == NULL) | 643 | if (net->ct.expect_hash == NULL) |
636 | goto err1; | 644 | goto err1; |
637 | 645 | ||
@@ -653,8 +661,7 @@ err3: | |||
653 | if (net_eq(net, &init_net)) | 661 | if (net_eq(net, &init_net)) |
654 | kmem_cache_destroy(nf_ct_expect_cachep); | 662 | kmem_cache_destroy(nf_ct_expect_cachep); |
655 | err2: | 663 | err2: |
656 | nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, | 664 | nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); |
657 | nf_ct_expect_hsize); | ||
658 | err1: | 665 | err1: |
659 | return err; | 666 | return err; |
660 | } | 667 | } |
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net) | |||
666 | rcu_barrier(); /* Wait for call_rcu() before destroy */ | 673 | rcu_barrier(); /* Wait for call_rcu() before destroy */ |
667 | kmem_cache_destroy(nf_ct_expect_cachep); | 674 | kmem_cache_destroy(nf_ct_expect_cachep); |
668 | } | 675 | } |
669 | nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, | 676 | nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); |
670 | nf_ct_expect_hsize); | ||
671 | } | 677 | } |
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bd82450c193f..80a23ed62bb0 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c | |||
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type) | |||
140 | /* This assumes that extended areas in conntrack for the types | 140 | /* This assumes that extended areas in conntrack for the types |
141 | whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ | 141 | whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ |
142 | for (i = min; i <= max; i++) { | 142 | for (i = min; i <= max; i++) { |
143 | t1 = nf_ct_ext_types[i]; | 143 | t1 = rcu_dereference_protected(nf_ct_ext_types[i], |
144 | lockdep_is_held(&nf_ct_ext_type_mutex)); | ||
144 | if (!t1) | 145 | if (!t1) |
145 | continue; | 146 | continue; |
146 | 147 | ||
147 | t1->alloc_size = sizeof(struct nf_ct_ext) | 148 | t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + |
148 | + ALIGN(sizeof(struct nf_ct_ext), t1->align) | 149 | t1->len; |
149 | + t1->len; | ||
150 | for (j = 0; j < NF_CT_EXT_NUM; j++) { | 150 | for (j = 0; j < NF_CT_EXT_NUM; j++) { |
151 | t2 = nf_ct_ext_types[j]; | 151 | t2 = rcu_dereference_protected(nf_ct_ext_types[j], |
152 | lockdep_is_held(&nf_ct_ext_type_mutex)); | ||
152 | if (t2 == NULL || t2 == t1 || | 153 | if (t2 == NULL || t2 == t1 || |
153 | (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) | 154 | (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) |
154 | continue; | 155 | continue; |
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 59e1a4cd4e8b..1bdfea357955 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c | |||
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex); | |||
33 | static struct hlist_head *nf_ct_helper_hash __read_mostly; | 33 | static struct hlist_head *nf_ct_helper_hash __read_mostly; |
34 | static unsigned int nf_ct_helper_hsize __read_mostly; | 34 | static unsigned int nf_ct_helper_hsize __read_mostly; |
35 | static unsigned int nf_ct_helper_count __read_mostly; | 35 | static unsigned int nf_ct_helper_count __read_mostly; |
36 | static int nf_ct_helper_vmalloc; | ||
37 | 36 | ||
38 | 37 | ||
39 | /* Stupid hash, but collision free for the default registrations of the | 38 | /* Stupid hash, but collision free for the default registrations of the |
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, | |||
158 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); | 157 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); |
159 | struct nf_conn_help *help = nfct_help(ct); | 158 | struct nf_conn_help *help = nfct_help(ct); |
160 | 159 | ||
161 | if (help && help->helper == me) { | 160 | if (help && rcu_dereference_protected( |
161 | help->helper, | ||
162 | lockdep_is_held(&nf_conntrack_lock) | ||
163 | ) == me) { | ||
162 | nf_conntrack_event(IPCT_HELPER, ct); | 164 | nf_conntrack_event(IPCT_HELPER, ct); |
163 | rcu_assign_pointer(help->helper, NULL); | 165 | rcu_assign_pointer(help->helper, NULL); |
164 | } | 166 | } |
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, | |||
210 | hlist_for_each_entry_safe(exp, n, next, | 212 | hlist_for_each_entry_safe(exp, n, next, |
211 | &net->ct.expect_hash[i], hnode) { | 213 | &net->ct.expect_hash[i], hnode) { |
212 | struct nf_conn_help *help = nfct_help(exp->master); | 214 | struct nf_conn_help *help = nfct_help(exp->master); |
213 | if ((help->helper == me || exp->helper == me) && | 215 | if ((rcu_dereference_protected( |
216 | help->helper, | ||
217 | lockdep_is_held(&nf_conntrack_lock) | ||
218 | ) == me || exp->helper == me) && | ||
214 | del_timer(&exp->timeout)) { | 219 | del_timer(&exp->timeout)) { |
215 | nf_ct_unlink_expect(exp); | 220 | nf_ct_unlink_expect(exp); |
216 | nf_ct_expect_put(exp); | 221 | nf_ct_expect_put(exp); |
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void) | |||
261 | int err; | 266 | int err; |
262 | 267 | ||
263 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ | 268 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ |
264 | nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, | 269 | nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); |
265 | &nf_ct_helper_vmalloc, 0); | ||
266 | if (!nf_ct_helper_hash) | 270 | if (!nf_ct_helper_hash) |
267 | return -ENOMEM; | 271 | return -ENOMEM; |
268 | 272 | ||
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void) | |||
273 | return 0; | 277 | return 0; |
274 | 278 | ||
275 | err1: | 279 | err1: |
276 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, | 280 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); |
277 | nf_ct_helper_hsize); | ||
278 | return err; | 281 | return err; |
279 | } | 282 | } |
280 | 283 | ||
281 | void nf_conntrack_helper_fini(void) | 284 | void nf_conntrack_helper_fini(void) |
282 | { | 285 | { |
283 | nf_ct_extend_unregister(&helper_extend); | 286 | nf_ct_extend_unregister(&helper_extend); |
284 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, | 287 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); |
285 | nf_ct_helper_hsize); | ||
286 | } | 288 | } |
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index aadde018a072..4c8f30a3d6d2 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c | |||
@@ -18,14 +18,7 @@ | |||
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/skbuff.h> | ||
22 | #include <linux/netdevice.h> | ||
23 | #include <linux/inetdevice.h> | ||
24 | #include <linux/if_addr.h> | ||
25 | #include <linux/in.h> | 21 | #include <linux/in.h> |
26 | #include <linux/ip.h> | ||
27 | #include <linux/netfilter.h> | ||
28 | #include <net/route.h> | ||
29 | 22 | ||
30 | #include <net/netfilter/nf_conntrack.h> | 23 | #include <net/netfilter/nf_conntrack.h> |
31 | #include <net/netfilter/nf_conntrack_helper.h> | 24 | #include <net/netfilter/nf_conntrack_helper.h> |
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns"); | |||
40 | MODULE_ALIAS_NFCT_HELPER("netbios_ns"); | 33 | MODULE_ALIAS_NFCT_HELPER("netbios_ns"); |
41 | 34 | ||
42 | static unsigned int timeout __read_mostly = 3; | 35 | static unsigned int timeout __read_mostly = 3; |
43 | module_param(timeout, uint, 0400); | 36 | module_param(timeout, uint, S_IRUSR); |
44 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); | 37 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); |
45 | 38 | ||
46 | static int help(struct sk_buff *skb, unsigned int protoff, | ||
47 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
48 | { | ||
49 | struct nf_conntrack_expect *exp; | ||
50 | struct iphdr *iph = ip_hdr(skb); | ||
51 | struct rtable *rt = skb_rtable(skb); | ||
52 | struct in_device *in_dev; | ||
53 | __be32 mask = 0; | ||
54 | |||
55 | /* we're only interested in locally generated packets */ | ||
56 | if (skb->sk == NULL) | ||
57 | goto out; | ||
58 | if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) | ||
59 | goto out; | ||
60 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
61 | goto out; | ||
62 | |||
63 | rcu_read_lock(); | ||
64 | in_dev = __in_dev_get_rcu(rt->dst.dev); | ||
65 | if (in_dev != NULL) { | ||
66 | for_primary_ifa(in_dev) { | ||
67 | if (ifa->ifa_broadcast == iph->daddr) { | ||
68 | mask = ifa->ifa_mask; | ||
69 | break; | ||
70 | } | ||
71 | } endfor_ifa(in_dev); | ||
72 | } | ||
73 | rcu_read_unlock(); | ||
74 | |||
75 | if (mask == 0) | ||
76 | goto out; | ||
77 | |||
78 | exp = nf_ct_expect_alloc(ct); | ||
79 | if (exp == NULL) | ||
80 | goto out; | ||
81 | |||
82 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
83 | exp->tuple.src.u.udp.port = htons(NMBD_PORT); | ||
84 | |||
85 | exp->mask.src.u3.ip = mask; | ||
86 | exp->mask.src.u.udp.port = htons(0xFFFF); | ||
87 | |||
88 | exp->expectfn = NULL; | ||
89 | exp->flags = NF_CT_EXPECT_PERMANENT; | ||
90 | exp->class = NF_CT_EXPECT_CLASS_DEFAULT; | ||
91 | exp->helper = NULL; | ||
92 | |||
93 | nf_ct_expect_related(exp); | ||
94 | nf_ct_expect_put(exp); | ||
95 | |||
96 | nf_ct_refresh(ct, skb, timeout * HZ); | ||
97 | out: | ||
98 | return NF_ACCEPT; | ||
99 | } | ||
100 | |||
101 | static struct nf_conntrack_expect_policy exp_policy = { | 39 | static struct nf_conntrack_expect_policy exp_policy = { |
102 | .max_expected = 1, | 40 | .max_expected = 1, |
103 | }; | 41 | }; |
104 | 42 | ||
43 | static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, | ||
44 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
45 | { | ||
46 | return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); | ||
47 | } | ||
48 | |||
105 | static struct nf_conntrack_helper helper __read_mostly = { | 49 | static struct nf_conntrack_helper helper __read_mostly = { |
106 | .name = "netbios-ns", | 50 | .name = "netbios-ns", |
107 | .tuple.src.l3num = AF_INET, | 51 | .tuple.src.l3num = NFPROTO_IPV4, |
108 | .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), | 52 | .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), |
109 | .tuple.dst.protonum = IPPROTO_UDP, | 53 | .tuple.dst.protonum = IPPROTO_UDP, |
110 | .me = THIS_MODULE, | 54 | .me = THIS_MODULE, |
111 | .help = help, | 55 | .help = netbios_ns_help, |
112 | .expect_policy = &exp_policy, | 56 | .expect_policy = &exp_policy, |
113 | }; | 57 | }; |
114 | 58 | ||
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 93297aaceb2b..3fec12c570a8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <net/netfilter/nf_conntrack_tuple.h> | 42 | #include <net/netfilter/nf_conntrack_tuple.h> |
43 | #include <net/netfilter/nf_conntrack_acct.h> | 43 | #include <net/netfilter/nf_conntrack_acct.h> |
44 | #include <net/netfilter/nf_conntrack_zones.h> | 44 | #include <net/netfilter/nf_conntrack_zones.h> |
45 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
45 | #ifdef CONFIG_NF_NAT_NEEDED | 46 | #ifdef CONFIG_NF_NAT_NEEDED |
46 | #include <net/netfilter/nf_nat_core.h> | 47 | #include <net/netfilter/nf_nat_core.h> |
47 | #include <net/netfilter/nf_nat_protocol.h> | 48 | #include <net/netfilter/nf_nat_protocol.h> |
@@ -230,6 +231,33 @@ nla_put_failure: | |||
230 | return -1; | 231 | return -1; |
231 | } | 232 | } |
232 | 233 | ||
234 | static int | ||
235 | ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) | ||
236 | { | ||
237 | struct nlattr *nest_count; | ||
238 | const struct nf_conn_tstamp *tstamp; | ||
239 | |||
240 | tstamp = nf_conn_tstamp_find(ct); | ||
241 | if (!tstamp) | ||
242 | return 0; | ||
243 | |||
244 | nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); | ||
245 | if (!nest_count) | ||
246 | goto nla_put_failure; | ||
247 | |||
248 | NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); | ||
249 | if (tstamp->stop != 0) { | ||
250 | NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, | ||
251 | cpu_to_be64(tstamp->stop)); | ||
252 | } | ||
253 | nla_nest_end(skb, nest_count); | ||
254 | |||
255 | return 0; | ||
256 | |||
257 | nla_put_failure: | ||
258 | return -1; | ||
259 | } | ||
260 | |||
233 | #ifdef CONFIG_NF_CONNTRACK_MARK | 261 | #ifdef CONFIG_NF_CONNTRACK_MARK |
234 | static inline int | 262 | static inline int |
235 | ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) | 263 | ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) |
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
404 | ctnetlink_dump_timeout(skb, ct) < 0 || | 432 | ctnetlink_dump_timeout(skb, ct) < 0 || |
405 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | 433 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || |
406 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || | 434 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || |
435 | ctnetlink_dump_timestamp(skb, ct) < 0 || | ||
407 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | 436 | ctnetlink_dump_protoinfo(skb, ct) < 0 || |
408 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | 437 | ctnetlink_dump_helpinfo(skb, ct) < 0 || |
409 | ctnetlink_dump_mark(skb, ct) < 0 || | 438 | ctnetlink_dump_mark(skb, ct) < 0 || |
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct) | |||
471 | } | 500 | } |
472 | 501 | ||
473 | static inline size_t | 502 | static inline size_t |
503 | ctnetlink_timestamp_size(const struct nf_conn *ct) | ||
504 | { | ||
505 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
506 | if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) | ||
507 | return 0; | ||
508 | return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); | ||
509 | #else | ||
510 | return 0; | ||
511 | #endif | ||
512 | } | ||
513 | |||
514 | static inline size_t | ||
474 | ctnetlink_nlmsg_size(const struct nf_conn *ct) | 515 | ctnetlink_nlmsg_size(const struct nf_conn *ct) |
475 | { | 516 | { |
476 | return NLMSG_ALIGN(sizeof(struct nfgenmsg)) | 517 | return NLMSG_ALIGN(sizeof(struct nfgenmsg)) |
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) | |||
481 | + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ | 522 | + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ |
482 | + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ | 523 | + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ |
483 | + ctnetlink_counters_size(ct) | 524 | + ctnetlink_counters_size(ct) |
525 | + ctnetlink_timestamp_size(ct) | ||
484 | + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ | 526 | + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ |
485 | + nla_total_size(0) /* CTA_PROTOINFO */ | 527 | + nla_total_size(0) /* CTA_PROTOINFO */ |
486 | + nla_total_size(0) /* CTA_HELP */ | 528 | + nla_total_size(0) /* CTA_HELP */ |
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
571 | 613 | ||
572 | if (events & (1 << IPCT_DESTROY)) { | 614 | if (events & (1 << IPCT_DESTROY)) { |
573 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | 615 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || |
574 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | 616 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || |
617 | ctnetlink_dump_timestamp(skb, ct) < 0) | ||
575 | goto nla_put_failure; | 618 | goto nla_put_failure; |
576 | } else { | 619 | } else { |
577 | if (ctnetlink_dump_timeout(skb, ct) < 0) | 620 | if (ctnetlink_dump_timeout(skb, ct) < 0) |
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, | |||
1357 | } | 1400 | } |
1358 | 1401 | ||
1359 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); | 1402 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); |
1403 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | ||
1360 | nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); | 1404 | nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); |
1361 | /* we must add conntrack extensions before confirmation. */ | 1405 | /* we must add conntrack extensions before confirmation. */ |
1362 | ct->status |= IPS_CONFIRMED; | 1406 | ct->status |= IPS_CONFIRMED; |
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, | |||
1375 | } | 1419 | } |
1376 | #endif | 1420 | #endif |
1377 | 1421 | ||
1422 | memset(&ct->proto, 0, sizeof(ct->proto)); | ||
1378 | if (cda[CTA_PROTOINFO]) { | 1423 | if (cda[CTA_PROTOINFO]) { |
1379 | err = ctnetlink_change_protoinfo(ct, cda); | 1424 | err = ctnetlink_change_protoinfo(ct, cda); |
1380 | if (err < 0) | 1425 | if (err < 0) |
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index dc7bb74110df..5701c8dd783c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c | |||
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto | |||
166 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | 166 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) |
167 | { | 167 | { |
168 | int ret = 0; | 168 | int ret = 0; |
169 | struct nf_conntrack_l3proto *old; | ||
169 | 170 | ||
170 | if (proto->l3proto >= AF_MAX) | 171 | if (proto->l3proto >= AF_MAX) |
171 | return -EBUSY; | 172 | return -EBUSY; |
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | |||
174 | return -EINVAL; | 175 | return -EINVAL; |
175 | 176 | ||
176 | mutex_lock(&nf_ct_proto_mutex); | 177 | mutex_lock(&nf_ct_proto_mutex); |
177 | if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { | 178 | old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], |
179 | lockdep_is_held(&nf_ct_proto_mutex)); | ||
180 | if (old != &nf_conntrack_l3proto_generic) { | ||
178 | ret = -EBUSY; | 181 | ret = -EBUSY; |
179 | goto out_unlock; | 182 | goto out_unlock; |
180 | } | 183 | } |
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) | |||
201 | BUG_ON(proto->l3proto >= AF_MAX); | 204 | BUG_ON(proto->l3proto >= AF_MAX); |
202 | 205 | ||
203 | mutex_lock(&nf_ct_proto_mutex); | 206 | mutex_lock(&nf_ct_proto_mutex); |
204 | BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); | 207 | BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], |
208 | lockdep_is_held(&nf_ct_proto_mutex) | ||
209 | ) != proto); | ||
205 | rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], | 210 | rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], |
206 | &nf_conntrack_l3proto_generic); | 211 | &nf_conntrack_l3proto_generic); |
207 | nf_ct_l3proto_unregister_sysctl(proto); | 212 | nf_ct_l3proto_unregister_sysctl(proto); |
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
279 | mutex_lock(&nf_ct_proto_mutex); | 284 | mutex_lock(&nf_ct_proto_mutex); |
280 | if (!nf_ct_protos[l4proto->l3proto]) { | 285 | if (!nf_ct_protos[l4proto->l3proto]) { |
281 | /* l3proto may be loaded latter. */ | 286 | /* l3proto may be loaded latter. */ |
282 | struct nf_conntrack_l4proto **proto_array; | 287 | struct nf_conntrack_l4proto __rcu **proto_array; |
283 | int i; | 288 | int i; |
284 | 289 | ||
285 | proto_array = kmalloc(MAX_NF_CT_PROTO * | 290 | proto_array = kmalloc(MAX_NF_CT_PROTO * |
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
291 | } | 296 | } |
292 | 297 | ||
293 | for (i = 0; i < MAX_NF_CT_PROTO; i++) | 298 | for (i = 0; i < MAX_NF_CT_PROTO; i++) |
294 | proto_array[i] = &nf_conntrack_l4proto_generic; | 299 | RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); |
295 | 300 | ||
296 | /* Before making proto_array visible to lockless readers, | 301 | /* Before making proto_array visible to lockless readers, |
297 | * we must make sure its content is committed to memory. | 302 | * we must make sure its content is committed to memory. |
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
299 | smp_wmb(); | 304 | smp_wmb(); |
300 | 305 | ||
301 | nf_ct_protos[l4proto->l3proto] = proto_array; | 306 | nf_ct_protos[l4proto->l3proto] = proto_array; |
302 | } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != | 307 | } else if (rcu_dereference_protected( |
303 | &nf_conntrack_l4proto_generic) { | 308 | nf_ct_protos[l4proto->l3proto][l4proto->l4proto], |
309 | lockdep_is_held(&nf_ct_proto_mutex) | ||
310 | ) != &nf_conntrack_l4proto_generic) { | ||
304 | ret = -EBUSY; | 311 | ret = -EBUSY; |
305 | goto out_unlock; | 312 | goto out_unlock; |
306 | } | 313 | } |
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) | |||
331 | BUG_ON(l4proto->l3proto >= PF_MAX); | 338 | BUG_ON(l4proto->l3proto >= PF_MAX); |
332 | 339 | ||
333 | mutex_lock(&nf_ct_proto_mutex); | 340 | mutex_lock(&nf_ct_proto_mutex); |
334 | BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); | 341 | BUG_ON(rcu_dereference_protected( |
342 | nf_ct_protos[l4proto->l3proto][l4proto->l4proto], | ||
343 | lockdep_is_held(&nf_ct_proto_mutex) | ||
344 | ) != l4proto); | ||
335 | rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], | 345 | rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], |
336 | &nf_conntrack_l4proto_generic); | 346 | &nf_conntrack_l4proto_generic); |
337 | nf_ct_l4proto_unregister_sysctl(l4proto); | 347 | nf_ct_l4proto_unregister_sysctl(l4proto); |
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5292560d6d4a..9ae57c57c50e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c | |||
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
452 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; | 452 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; |
453 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; | 453 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; |
454 | ct->proto.dccp.state = CT_DCCP_NONE; | 454 | ct->proto.dccp.state = CT_DCCP_NONE; |
455 | ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; | ||
456 | ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; | ||
457 | ct->proto.dccp.handshake_seq = 0; | ||
455 | return true; | 458 | return true; |
456 | 459 | ||
457 | out_invalid: | 460 | out_invalid: |
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6049c2d5ea8..6f4ee70f460b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c | |||
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
413 | test_bit(SCTP_CID_COOKIE_ACK, map)) | 413 | test_bit(SCTP_CID_COOKIE_ACK, map)) |
414 | return false; | 414 | return false; |
415 | 415 | ||
416 | memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); | ||
416 | new_state = SCTP_CONNTRACK_MAX; | 417 | new_state = SCTP_CONNTRACK_MAX; |
417 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | 418 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { |
418 | /* Don't need lock here: this conntrack not in circulation yet */ | 419 | /* Don't need lock here: this conntrack not in circulation yet */ |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3fb2b73b24dc..6f38d0e2ea4a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1066 | BUG_ON(th == NULL); | 1066 | BUG_ON(th == NULL); |
1067 | 1067 | ||
1068 | /* Don't need lock here: this conntrack not in circulation yet */ | 1068 | /* Don't need lock here: this conntrack not in circulation yet */ |
1069 | new_state | 1069 | new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; |
1070 | = tcp_conntracks[0][get_conntrack_index(th)] | ||
1071 | [TCP_CONNTRACK_NONE]; | ||
1072 | 1070 | ||
1073 | /* Invalid: delete conntrack */ | 1071 | /* Invalid: delete conntrack */ |
1074 | if (new_state >= TCP_CONNTRACK_MAX) { | 1072 | if (new_state >= TCP_CONNTRACK_MAX) { |
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1077 | } | 1075 | } |
1078 | 1076 | ||
1079 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | 1077 | if (new_state == TCP_CONNTRACK_SYN_SENT) { |
1078 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | ||
1080 | /* SYN packet */ | 1079 | /* SYN packet */ |
1081 | ct->proto.tcp.seen[0].td_end = | 1080 | ct->proto.tcp.seen[0].td_end = |
1082 | segment_seq_plus_len(ntohl(th->seq), skb->len, | 1081 | segment_seq_plus_len(ntohl(th->seq), skb->len, |
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1088 | ct->proto.tcp.seen[0].td_end; | 1087 | ct->proto.tcp.seen[0].td_end; |
1089 | 1088 | ||
1090 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); | 1089 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); |
1091 | ct->proto.tcp.seen[1].flags = 0; | ||
1092 | } else if (nf_ct_tcp_loose == 0) { | 1090 | } else if (nf_ct_tcp_loose == 0) { |
1093 | /* Don't try to pick up connections. */ | 1091 | /* Don't try to pick up connections. */ |
1094 | return false; | 1092 | return false; |
1095 | } else { | 1093 | } else { |
1094 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | ||
1096 | /* | 1095 | /* |
1097 | * We are in the middle of a connection, | 1096 | * We are in the middle of a connection, |
1098 | * its history is lost for us. | 1097 | * its history is lost for us. |
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1107 | ct->proto.tcp.seen[0].td_maxend = | 1106 | ct->proto.tcp.seen[0].td_maxend = |
1108 | ct->proto.tcp.seen[0].td_end + | 1107 | ct->proto.tcp.seen[0].td_end + |
1109 | ct->proto.tcp.seen[0].td_maxwin; | 1108 | ct->proto.tcp.seen[0].td_maxwin; |
1110 | ct->proto.tcp.seen[0].td_scale = 0; | ||
1111 | 1109 | ||
1112 | /* We assume SACK and liberal window checking to handle | 1110 | /* We assume SACK and liberal window checking to handle |
1113 | * window scaling */ | 1111 | * window scaling */ |
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1116 | IP_CT_TCP_FLAG_BE_LIBERAL; | 1114 | IP_CT_TCP_FLAG_BE_LIBERAL; |
1117 | } | 1115 | } |
1118 | 1116 | ||
1119 | ct->proto.tcp.seen[1].td_end = 0; | ||
1120 | ct->proto.tcp.seen[1].td_maxend = 0; | ||
1121 | ct->proto.tcp.seen[1].td_maxwin = 0; | ||
1122 | ct->proto.tcp.seen[1].td_scale = 0; | ||
1123 | |||
1124 | /* tcp_packet will set them */ | 1117 | /* tcp_packet will set them */ |
1125 | ct->proto.tcp.state = TCP_CONNTRACK_NONE; | ||
1126 | ct->proto.tcp.last_index = TCP_NONE_SET; | 1118 | ct->proto.tcp.last_index = TCP_NONE_SET; |
1127 | 1119 | ||
1128 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | 1120 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " |
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c new file mode 100644 index 000000000000..6e545e26289e --- /dev/null +++ b/net/netfilter/nf_conntrack_snmp.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * SNMP service broadcast connection tracking helper | ||
3 | * | ||
4 | * (c) 2011 Jiri Olsa <jolsa@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/in.h> | ||
15 | |||
16 | #include <net/netfilter/nf_conntrack.h> | ||
17 | #include <net/netfilter/nf_conntrack_helper.h> | ||
18 | #include <net/netfilter/nf_conntrack_expect.h> | ||
19 | |||
20 | #define SNMP_PORT 161 | ||
21 | |||
22 | MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>"); | ||
23 | MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper"); | ||
24 | MODULE_LICENSE("GPL"); | ||
25 | MODULE_ALIAS_NFCT_HELPER("snmp"); | ||
26 | |||
27 | static unsigned int timeout __read_mostly = 30; | ||
28 | module_param(timeout, uint, S_IRUSR); | ||
29 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); | ||
30 | |||
31 | int (*nf_nat_snmp_hook)(struct sk_buff *skb, | ||
32 | unsigned int protoff, | ||
33 | struct nf_conn *ct, | ||
34 | enum ip_conntrack_info ctinfo); | ||
35 | EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); | ||
36 | |||
37 | static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, | ||
38 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
39 | { | ||
40 | typeof(nf_nat_snmp_hook) nf_nat_snmp; | ||
41 | |||
42 | nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); | ||
43 | |||
44 | nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook); | ||
45 | if (nf_nat_snmp && ct->status & IPS_NAT_MASK) | ||
46 | return nf_nat_snmp(skb, protoff, ct, ctinfo); | ||
47 | |||
48 | return NF_ACCEPT; | ||
49 | } | ||
50 | |||
51 | static struct nf_conntrack_expect_policy exp_policy = { | ||
52 | .max_expected = 1, | ||
53 | }; | ||
54 | |||
55 | static struct nf_conntrack_helper helper __read_mostly = { | ||
56 | .name = "snmp", | ||
57 | .tuple.src.l3num = NFPROTO_IPV4, | ||
58 | .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), | ||
59 | .tuple.dst.protonum = IPPROTO_UDP, | ||
60 | .me = THIS_MODULE, | ||
61 | .help = snmp_conntrack_help, | ||
62 | .expect_policy = &exp_policy, | ||
63 | }; | ||
64 | |||
65 | static int __init nf_conntrack_snmp_init(void) | ||
66 | { | ||
67 | exp_policy.timeout = timeout; | ||
68 | return nf_conntrack_helper_register(&helper); | ||
69 | } | ||
70 | |||
71 | static void __exit nf_conntrack_snmp_fini(void) | ||
72 | { | ||
73 | nf_conntrack_helper_unregister(&helper); | ||
74 | } | ||
75 | |||
76 | module_init(nf_conntrack_snmp_init); | ||
77 | module_exit(nf_conntrack_snmp_fini); | ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b4d7f0f24b27..0ae142825881 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <net/netfilter/nf_conntrack_helper.h> | 29 | #include <net/netfilter/nf_conntrack_helper.h> |
30 | #include <net/netfilter/nf_conntrack_acct.h> | 30 | #include <net/netfilter/nf_conntrack_acct.h> |
31 | #include <net/netfilter/nf_conntrack_zones.h> | 31 | #include <net/netfilter/nf_conntrack_zones.h> |
32 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
33 | #include <linux/rculist_nulls.h> | ||
32 | 34 | ||
33 | MODULE_LICENSE("GPL"); | 35 | MODULE_LICENSE("GPL"); |
34 | 36 | ||
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple); | |||
45 | struct ct_iter_state { | 47 | struct ct_iter_state { |
46 | struct seq_net_private p; | 48 | struct seq_net_private p; |
47 | unsigned int bucket; | 49 | unsigned int bucket; |
50 | u_int64_t time_now; | ||
48 | }; | 51 | }; |
49 | 52 | ||
50 | static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | 53 | static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) |
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
56 | for (st->bucket = 0; | 59 | for (st->bucket = 0; |
57 | st->bucket < net->ct.htable_size; | 60 | st->bucket < net->ct.htable_size; |
58 | st->bucket++) { | 61 | st->bucket++) { |
59 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 62 | n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); |
60 | if (!is_a_nulls(n)) | 63 | if (!is_a_nulls(n)) |
61 | return n; | 64 | return n; |
62 | } | 65 | } |
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
69 | struct net *net = seq_file_net(seq); | 72 | struct net *net = seq_file_net(seq); |
70 | struct ct_iter_state *st = seq->private; | 73 | struct ct_iter_state *st = seq->private; |
71 | 74 | ||
72 | head = rcu_dereference(head->next); | 75 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
73 | while (is_a_nulls(head)) { | 76 | while (is_a_nulls(head)) { |
74 | if (likely(get_nulls_value(head) == st->bucket)) { | 77 | if (likely(get_nulls_value(head) == st->bucket)) { |
75 | if (++st->bucket >= net->ct.htable_size) | 78 | if (++st->bucket >= net->ct.htable_size) |
76 | return NULL; | 79 | return NULL; |
77 | } | 80 | } |
78 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 81 | head = rcu_dereference( |
82 | hlist_nulls_first_rcu( | ||
83 | &net->ct.hash[st->bucket])); | ||
79 | } | 84 | } |
80 | return head; | 85 | return head; |
81 | } | 86 | } |
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) | |||
93 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | 98 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) |
94 | __acquires(RCU) | 99 | __acquires(RCU) |
95 | { | 100 | { |
101 | struct ct_iter_state *st = seq->private; | ||
102 | |||
103 | st->time_now = ktime_to_ns(ktime_get_real()); | ||
96 | rcu_read_lock(); | 104 | rcu_read_lock(); |
97 | return ct_get_idx(seq, *pos); | 105 | return ct_get_idx(seq, *pos); |
98 | } | 106 | } |
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | |||
132 | } | 140 | } |
133 | #endif | 141 | #endif |
134 | 142 | ||
143 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
144 | static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) | ||
145 | { | ||
146 | struct ct_iter_state *st = s->private; | ||
147 | struct nf_conn_tstamp *tstamp; | ||
148 | s64 delta_time; | ||
149 | |||
150 | tstamp = nf_conn_tstamp_find(ct); | ||
151 | if (tstamp) { | ||
152 | delta_time = st->time_now - tstamp->start; | ||
153 | if (delta_time > 0) | ||
154 | delta_time = div_s64(delta_time, NSEC_PER_SEC); | ||
155 | else | ||
156 | delta_time = 0; | ||
157 | |||
158 | return seq_printf(s, "delta-time=%llu ", | ||
159 | (unsigned long long)delta_time); | ||
160 | } | ||
161 | return 0; | ||
162 | } | ||
163 | #else | ||
164 | static inline int | ||
165 | ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) | ||
166 | { | ||
167 | return 0; | ||
168 | } | ||
169 | #endif | ||
170 | |||
135 | /* return 0 on success, 1 in case of error */ | 171 | /* return 0 on success, 1 in case of error */ |
136 | static int ct_seq_show(struct seq_file *s, void *v) | 172 | static int ct_seq_show(struct seq_file *s, void *v) |
137 | { | 173 | { |
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
200 | goto release; | 236 | goto release; |
201 | #endif | 237 | #endif |
202 | 238 | ||
239 | if (ct_show_delta_time(s, ct)) | ||
240 | goto release; | ||
241 | |||
203 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) | 242 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) |
204 | goto release; | 243 | goto release; |
205 | 244 | ||
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c new file mode 100644 index 000000000000..af7dd31af0a1 --- /dev/null +++ b/net/netfilter/nf_conntrack_timestamp.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation (or any later at your option). | ||
7 | */ | ||
8 | |||
9 | #include <linux/netfilter.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | |||
14 | #include <net/netfilter/nf_conntrack.h> | ||
15 | #include <net/netfilter/nf_conntrack_extend.h> | ||
16 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
17 | |||
18 | static int nf_ct_tstamp __read_mostly; | ||
19 | |||
20 | module_param_named(tstamp, nf_ct_tstamp, bool, 0644); | ||
21 | MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); | ||
22 | |||
23 | #ifdef CONFIG_SYSCTL | ||
24 | static struct ctl_table tstamp_sysctl_table[] = { | ||
25 | { | ||
26 | .procname = "nf_conntrack_timestamp", | ||
27 | .data = &init_net.ct.sysctl_tstamp, | ||
28 | .maxlen = sizeof(unsigned int), | ||
29 | .mode = 0644, | ||
30 | .proc_handler = proc_dointvec, | ||
31 | }, | ||
32 | {} | ||
33 | }; | ||
34 | #endif /* CONFIG_SYSCTL */ | ||
35 | |||
36 | static struct nf_ct_ext_type tstamp_extend __read_mostly = { | ||
37 | .len = sizeof(struct nf_conn_tstamp), | ||
38 | .align = __alignof__(struct nf_conn_tstamp), | ||
39 | .id = NF_CT_EXT_TSTAMP, | ||
40 | }; | ||
41 | |||
42 | #ifdef CONFIG_SYSCTL | ||
43 | static int nf_conntrack_tstamp_init_sysctl(struct net *net) | ||
44 | { | ||
45 | struct ctl_table *table; | ||
46 | |||
47 | table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), | ||
48 | GFP_KERNEL); | ||
49 | if (!table) | ||
50 | goto out; | ||
51 | |||
52 | table[0].data = &net->ct.sysctl_tstamp; | ||
53 | |||
54 | net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, | ||
55 | nf_net_netfilter_sysctl_path, table); | ||
56 | if (!net->ct.tstamp_sysctl_header) { | ||
57 | printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); | ||
58 | goto out_register; | ||
59 | } | ||
60 | return 0; | ||
61 | |||
62 | out_register: | ||
63 | kfree(table); | ||
64 | out: | ||
65 | return -ENOMEM; | ||
66 | } | ||
67 | |||
68 | static void nf_conntrack_tstamp_fini_sysctl(struct net *net) | ||
69 | { | ||
70 | struct ctl_table *table; | ||
71 | |||
72 | table = net->ct.tstamp_sysctl_header->ctl_table_arg; | ||
73 | unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); | ||
74 | kfree(table); | ||
75 | } | ||
76 | #else | ||
77 | static int nf_conntrack_tstamp_init_sysctl(struct net *net) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static void nf_conntrack_tstamp_fini_sysctl(struct net *net) | ||
83 | { | ||
84 | } | ||
85 | #endif | ||
86 | |||
87 | int nf_conntrack_tstamp_init(struct net *net) | ||
88 | { | ||
89 | int ret; | ||
90 | |||
91 | net->ct.sysctl_tstamp = nf_ct_tstamp; | ||
92 | |||
93 | if (net_eq(net, &init_net)) { | ||
94 | ret = nf_ct_extend_register(&tstamp_extend); | ||
95 | if (ret < 0) { | ||
96 | printk(KERN_ERR "nf_ct_tstamp: Unable to register " | ||
97 | "extension\n"); | ||
98 | goto out_extend_register; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | ret = nf_conntrack_tstamp_init_sysctl(net); | ||
103 | if (ret < 0) | ||
104 | goto out_sysctl; | ||
105 | |||
106 | return 0; | ||
107 | |||
108 | out_sysctl: | ||
109 | if (net_eq(net, &init_net)) | ||
110 | nf_ct_extend_unregister(&tstamp_extend); | ||
111 | out_extend_register: | ||
112 | return ret; | ||
113 | } | ||
114 | |||
115 | void nf_conntrack_tstamp_fini(struct net *net) | ||
116 | { | ||
117 | nf_conntrack_tstamp_fini_sysctl(net); | ||
118 | if (net_eq(net, &init_net)) | ||
119 | nf_ct_extend_unregister(&tstamp_extend); | ||
120 | } | ||
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index b07393eab88e..20c775cff2a8 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v) | |||
161 | struct nf_logger *t; | 161 | struct nf_logger *t; |
162 | int ret; | 162 | int ret; |
163 | 163 | ||
164 | logger = nf_loggers[*pos]; | 164 | logger = rcu_dereference_protected(nf_loggers[*pos], |
165 | lockdep_is_held(&nf_log_mutex)); | ||
165 | 166 | ||
166 | if (!logger) | 167 | if (!logger) |
167 | ret = seq_printf(s, "%2lld NONE (", *pos); | 168 | ret = seq_printf(s, "%2lld NONE (", *pos); |
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write, | |||
249 | mutex_unlock(&nf_log_mutex); | 250 | mutex_unlock(&nf_log_mutex); |
250 | } else { | 251 | } else { |
251 | mutex_lock(&nf_log_mutex); | 252 | mutex_lock(&nf_log_mutex); |
252 | logger = nf_loggers[tindex]; | 253 | logger = rcu_dereference_protected(nf_loggers[tindex], |
254 | lockdep_is_held(&nf_log_mutex)); | ||
253 | if (!logger) | 255 | if (!logger) |
254 | table->data = "NONE"; | 256 | table->data = "NONE"; |
255 | else | 257 | else |
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 74aebed5bd28..5ab22e2bbd7d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex); | |||
27 | int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) | 27 | int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) |
28 | { | 28 | { |
29 | int ret; | 29 | int ret; |
30 | const struct nf_queue_handler *old; | ||
30 | 31 | ||
31 | if (pf >= ARRAY_SIZE(queue_handler)) | 32 | if (pf >= ARRAY_SIZE(queue_handler)) |
32 | return -EINVAL; | 33 | return -EINVAL; |
33 | 34 | ||
34 | mutex_lock(&queue_handler_mutex); | 35 | mutex_lock(&queue_handler_mutex); |
35 | if (queue_handler[pf] == qh) | 36 | old = rcu_dereference_protected(queue_handler[pf], |
37 | lockdep_is_held(&queue_handler_mutex)); | ||
38 | if (old == qh) | ||
36 | ret = -EEXIST; | 39 | ret = -EEXIST; |
37 | else if (queue_handler[pf]) | 40 | else if (old) |
38 | ret = -EBUSY; | 41 | ret = -EBUSY; |
39 | else { | 42 | else { |
40 | rcu_assign_pointer(queue_handler[pf], qh); | 43 | rcu_assign_pointer(queue_handler[pf], qh); |
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler); | |||
49 | /* The caller must flush their queue before this */ | 52 | /* The caller must flush their queue before this */ |
50 | int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) | 53 | int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) |
51 | { | 54 | { |
55 | const struct nf_queue_handler *old; | ||
56 | |||
52 | if (pf >= ARRAY_SIZE(queue_handler)) | 57 | if (pf >= ARRAY_SIZE(queue_handler)) |
53 | return -EINVAL; | 58 | return -EINVAL; |
54 | 59 | ||
55 | mutex_lock(&queue_handler_mutex); | 60 | mutex_lock(&queue_handler_mutex); |
56 | if (queue_handler[pf] && queue_handler[pf] != qh) { | 61 | old = rcu_dereference_protected(queue_handler[pf], |
62 | lockdep_is_held(&queue_handler_mutex)); | ||
63 | if (old && old != qh) { | ||
57 | mutex_unlock(&queue_handler_mutex); | 64 | mutex_unlock(&queue_handler_mutex); |
58 | return -EINVAL; | 65 | return -EINVAL; |
59 | } | 66 | } |
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) | |||
73 | 80 | ||
74 | mutex_lock(&queue_handler_mutex); | 81 | mutex_lock(&queue_handler_mutex); |
75 | for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { | 82 | for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { |
76 | if (queue_handler[pf] == qh) | 83 | if (rcu_dereference_protected( |
84 | queue_handler[pf], | ||
85 | lockdep_is_held(&queue_handler_mutex) | ||
86 | ) == qh) | ||
77 | rcu_assign_pointer(queue_handler[pf], NULL); | 87 | rcu_assign_pointer(queue_handler[pf], NULL); |
78 | } | 88 | } |
79 | mutex_unlock(&queue_handler_mutex); | 89 | mutex_unlock(&queue_handler_mutex); |
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb, | |||
115 | int (*okfn)(struct sk_buff *), | 125 | int (*okfn)(struct sk_buff *), |
116 | unsigned int queuenum) | 126 | unsigned int queuenum) |
117 | { | 127 | { |
118 | int status; | 128 | int status = -ENOENT; |
119 | struct nf_queue_entry *entry = NULL; | 129 | struct nf_queue_entry *entry = NULL; |
120 | #ifdef CONFIG_BRIDGE_NETFILTER | 130 | #ifdef CONFIG_BRIDGE_NETFILTER |
121 | struct net_device *physindev; | 131 | struct net_device *physindev; |
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb, | |||
128 | rcu_read_lock(); | 138 | rcu_read_lock(); |
129 | 139 | ||
130 | qh = rcu_dereference(queue_handler[pf]); | 140 | qh = rcu_dereference(queue_handler[pf]); |
131 | if (!qh) | 141 | if (!qh) { |
142 | status = -ESRCH; | ||
132 | goto err_unlock; | 143 | goto err_unlock; |
144 | } | ||
133 | 145 | ||
134 | afinfo = nf_get_afinfo(pf); | 146 | afinfo = nf_get_afinfo(pf); |
135 | if (!afinfo) | 147 | if (!afinfo) |
136 | goto err_unlock; | 148 | goto err_unlock; |
137 | 149 | ||
138 | entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); | 150 | entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); |
139 | if (!entry) | 151 | if (!entry) { |
152 | status = -ENOMEM; | ||
140 | goto err_unlock; | 153 | goto err_unlock; |
154 | } | ||
141 | 155 | ||
142 | *entry = (struct nf_queue_entry) { | 156 | *entry = (struct nf_queue_entry) { |
143 | .skb = skb, | 157 | .skb = skb, |
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb, | |||
151 | 165 | ||
152 | /* If it's going away, ignore hook. */ | 166 | /* If it's going away, ignore hook. */ |
153 | if (!try_module_get(entry->elem->owner)) { | 167 | if (!try_module_get(entry->elem->owner)) { |
154 | rcu_read_unlock(); | 168 | status = -ECANCELED; |
155 | kfree(entry); | 169 | goto err_unlock; |
156 | return 0; | ||
157 | } | 170 | } |
158 | |||
159 | /* Bump dev refs so they don't vanish while packet is out */ | 171 | /* Bump dev refs so they don't vanish while packet is out */ |
160 | if (indev) | 172 | if (indev) |
161 | dev_hold(indev); | 173 | dev_hold(indev); |
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb, | |||
182 | goto err; | 194 | goto err; |
183 | } | 195 | } |
184 | 196 | ||
185 | return 1; | 197 | return 0; |
186 | 198 | ||
187 | err_unlock: | 199 | err_unlock: |
188 | rcu_read_unlock(); | 200 | rcu_read_unlock(); |
189 | err: | 201 | err: |
190 | kfree_skb(skb); | ||
191 | kfree(entry); | 202 | kfree(entry); |
192 | return 1; | 203 | return status; |
193 | } | 204 | } |
194 | 205 | ||
195 | int nf_queue(struct sk_buff *skb, | 206 | int nf_queue(struct sk_buff *skb, |
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb, | |||
201 | unsigned int queuenum) | 212 | unsigned int queuenum) |
202 | { | 213 | { |
203 | struct sk_buff *segs; | 214 | struct sk_buff *segs; |
215 | int err; | ||
216 | unsigned int queued; | ||
204 | 217 | ||
205 | if (!skb_is_gso(skb)) | 218 | if (!skb_is_gso(skb)) |
206 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | 219 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, |
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb, | |||
216 | } | 229 | } |
217 | 230 | ||
218 | segs = skb_gso_segment(skb, 0); | 231 | segs = skb_gso_segment(skb, 0); |
219 | kfree_skb(skb); | 232 | /* Does not use PTR_ERR to limit the number of error codes that can be |
233 | * returned by nf_queue. For instance, callers rely on -ECANCELED to mean | ||
234 | * 'ignore this hook'. | ||
235 | */ | ||
220 | if (IS_ERR(segs)) | 236 | if (IS_ERR(segs)) |
221 | return 1; | 237 | return -EINVAL; |
222 | 238 | ||
239 | queued = 0; | ||
240 | err = 0; | ||
223 | do { | 241 | do { |
224 | struct sk_buff *nskb = segs->next; | 242 | struct sk_buff *nskb = segs->next; |
225 | 243 | ||
226 | segs->next = NULL; | 244 | segs->next = NULL; |
227 | if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, | 245 | if (err == 0) |
228 | queuenum)) | 246 | err = __nf_queue(segs, elem, pf, hook, indev, |
247 | outdev, okfn, queuenum); | ||
248 | if (err == 0) | ||
249 | queued++; | ||
250 | else | ||
229 | kfree_skb(segs); | 251 | kfree_skb(segs); |
230 | segs = nskb; | 252 | segs = nskb; |
231 | } while (segs); | 253 | } while (segs); |
232 | return 1; | 254 | |
255 | /* also free orig skb if only some segments were queued */ | ||
256 | if (unlikely(err && queued)) | ||
257 | err = 0; | ||
258 | if (err == 0) | ||
259 | kfree_skb(skb); | ||
260 | return err; | ||
233 | } | 261 | } |
234 | 262 | ||
235 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | 263 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) |
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | |||
237 | struct sk_buff *skb = entry->skb; | 265 | struct sk_buff *skb = entry->skb; |
238 | struct list_head *elem = &entry->elem->list; | 266 | struct list_head *elem = &entry->elem->list; |
239 | const struct nf_afinfo *afinfo; | 267 | const struct nf_afinfo *afinfo; |
268 | int err; | ||
240 | 269 | ||
241 | rcu_read_lock(); | 270 | rcu_read_lock(); |
242 | 271 | ||
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | |||
270 | local_bh_enable(); | 299 | local_bh_enable(); |
271 | break; | 300 | break; |
272 | case NF_QUEUE: | 301 | case NF_QUEUE: |
273 | if (!__nf_queue(skb, elem, entry->pf, entry->hook, | 302 | err = __nf_queue(skb, elem, entry->pf, entry->hook, |
274 | entry->indev, entry->outdev, entry->okfn, | 303 | entry->indev, entry->outdev, entry->okfn, |
275 | verdict >> NF_VERDICT_BITS)) | 304 | verdict >> NF_VERDICT_QBITS); |
276 | goto next_hook; | 305 | if (err < 0) { |
306 | if (err == -ECANCELED) | ||
307 | goto next_hook; | ||
308 | if (err == -ESRCH && | ||
309 | (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) | ||
310 | goto next_hook; | ||
311 | kfree_skb(skb); | ||
312 | } | ||
277 | break; | 313 | break; |
278 | case NF_STOLEN: | 314 | case NF_STOLEN: |
279 | default: | 315 | default: |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6a1572b0ab41..91592da504b9 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st) | |||
874 | 874 | ||
875 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | 875 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { |
876 | if (!hlist_empty(&instance_table[st->bucket])) | 876 | if (!hlist_empty(&instance_table[st->bucket])) |
877 | return rcu_dereference_bh(instance_table[st->bucket].first); | 877 | return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); |
878 | } | 878 | } |
879 | return NULL; | 879 | return NULL; |
880 | } | 880 | } |
881 | 881 | ||
882 | static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) | 882 | static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) |
883 | { | 883 | { |
884 | h = rcu_dereference_bh(h->next); | 884 | h = rcu_dereference_bh(hlist_next_rcu(h)); |
885 | while (!h) { | 885 | while (!h) { |
886 | if (++st->bucket >= INSTANCE_BUCKETS) | 886 | if (++st->bucket >= INSTANCE_BUCKETS) |
887 | return NULL; | 887 | return NULL; |
888 | 888 | ||
889 | h = rcu_dereference_bh(instance_table[st->bucket].first); | 889 | h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); |
890 | } | 890 | } |
891 | return h; | 891 | return h; |
892 | } | 892 | } |
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 68e67d19724d..b83123f12b42 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
387 | { | 387 | { |
388 | struct sk_buff *nskb; | 388 | struct sk_buff *nskb; |
389 | struct nfqnl_instance *queue; | 389 | struct nfqnl_instance *queue; |
390 | int err; | 390 | int err = -ENOBUFS; |
391 | 391 | ||
392 | /* rcu_read_lock()ed by nf_hook_slow() */ | 392 | /* rcu_read_lock()ed by nf_hook_slow() */ |
393 | queue = instance_lookup(queuenum); | 393 | queue = instance_lookup(queuenum); |
394 | if (!queue) | 394 | if (!queue) { |
395 | err = -ESRCH; | ||
395 | goto err_out; | 396 | goto err_out; |
397 | } | ||
396 | 398 | ||
397 | if (queue->copy_mode == NFQNL_COPY_NONE) | 399 | if (queue->copy_mode == NFQNL_COPY_NONE) { |
400 | err = -EINVAL; | ||
398 | goto err_out; | 401 | goto err_out; |
402 | } | ||
399 | 403 | ||
400 | nskb = nfqnl_build_packet_message(queue, entry); | 404 | nskb = nfqnl_build_packet_message(queue, entry); |
401 | if (nskb == NULL) | 405 | if (nskb == NULL) { |
406 | err = -ENOMEM; | ||
402 | goto err_out; | 407 | goto err_out; |
403 | 408 | } | |
404 | spin_lock_bh(&queue->lock); | 409 | spin_lock_bh(&queue->lock); |
405 | 410 | ||
406 | if (!queue->peer_pid) | 411 | if (!queue->peer_pid) { |
412 | err = -EINVAL; | ||
407 | goto err_out_free_nskb; | 413 | goto err_out_free_nskb; |
408 | 414 | } | |
409 | if (queue->queue_total >= queue->queue_maxlen) { | 415 | if (queue->queue_total >= queue->queue_maxlen) { |
410 | queue->queue_dropped++; | 416 | queue->queue_dropped++; |
411 | if (net_ratelimit()) | 417 | if (net_ratelimit()) |
@@ -432,7 +438,7 @@ err_out_free_nskb: | |||
432 | err_out_unlock: | 438 | err_out_unlock: |
433 | spin_unlock_bh(&queue->lock); | 439 | spin_unlock_bh(&queue->lock); |
434 | err_out: | 440 | err_out: |
435 | return -1; | 441 | return err; |
436 | } | 442 | } |
437 | 443 | ||
438 | static int | 444 | static int |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c94237631077..0a77d2ff2154 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/audit.h> | ||
26 | #include <net/net_namespace.h> | 27 | #include <net/net_namespace.h> |
27 | 28 | ||
28 | #include <linux/netfilter/x_tables.h> | 29 | #include <linux/netfilter/x_tables.h> |
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); | |||
38 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) | 39 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) |
39 | 40 | ||
40 | struct compat_delta { | 41 | struct compat_delta { |
41 | struct compat_delta *next; | 42 | unsigned int offset; /* offset in kernel */ |
42 | unsigned int offset; | 43 | int delta; /* delta in 32bit user land */ |
43 | int delta; | ||
44 | }; | 44 | }; |
45 | 45 | ||
46 | struct xt_af { | 46 | struct xt_af { |
@@ -49,7 +49,9 @@ struct xt_af { | |||
49 | struct list_head target; | 49 | struct list_head target; |
50 | #ifdef CONFIG_COMPAT | 50 | #ifdef CONFIG_COMPAT |
51 | struct mutex compat_mutex; | 51 | struct mutex compat_mutex; |
52 | struct compat_delta *compat_offsets; | 52 | struct compat_delta *compat_tab; |
53 | unsigned int number; /* number of slots in compat_tab[] */ | ||
54 | unsigned int cur; /* number of used slots in compat_tab[] */ | ||
53 | #endif | 55 | #endif |
54 | }; | 56 | }; |
55 | 57 | ||
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par, | |||
414 | EXPORT_SYMBOL_GPL(xt_check_match); | 416 | EXPORT_SYMBOL_GPL(xt_check_match); |
415 | 417 | ||
416 | #ifdef CONFIG_COMPAT | 418 | #ifdef CONFIG_COMPAT |
417 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) | 419 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) |
418 | { | 420 | { |
419 | struct compat_delta *tmp; | 421 | struct xt_af *xp = &xt[af]; |
420 | 422 | ||
421 | tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); | 423 | if (!xp->compat_tab) { |
422 | if (!tmp) | 424 | if (!xp->number) |
423 | return -ENOMEM; | 425 | return -EINVAL; |
426 | xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); | ||
427 | if (!xp->compat_tab) | ||
428 | return -ENOMEM; | ||
429 | xp->cur = 0; | ||
430 | } | ||
424 | 431 | ||
425 | tmp->offset = offset; | 432 | if (xp->cur >= xp->number) |
426 | tmp->delta = delta; | 433 | return -EINVAL; |
427 | 434 | ||
428 | if (xt[af].compat_offsets) { | 435 | if (xp->cur) |
429 | tmp->next = xt[af].compat_offsets->next; | 436 | delta += xp->compat_tab[xp->cur - 1].delta; |
430 | xt[af].compat_offsets->next = tmp; | 437 | xp->compat_tab[xp->cur].offset = offset; |
431 | } else { | 438 | xp->compat_tab[xp->cur].delta = delta; |
432 | xt[af].compat_offsets = tmp; | 439 | xp->cur++; |
433 | tmp->next = NULL; | ||
434 | } | ||
435 | return 0; | 440 | return 0; |
436 | } | 441 | } |
437 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); | 442 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); |
438 | 443 | ||
439 | void xt_compat_flush_offsets(u_int8_t af) | 444 | void xt_compat_flush_offsets(u_int8_t af) |
440 | { | 445 | { |
441 | struct compat_delta *tmp, *next; | 446 | if (xt[af].compat_tab) { |
442 | 447 | vfree(xt[af].compat_tab); | |
443 | if (xt[af].compat_offsets) { | 448 | xt[af].compat_tab = NULL; |
444 | for (tmp = xt[af].compat_offsets; tmp; tmp = next) { | 449 | xt[af].number = 0; |
445 | next = tmp->next; | ||
446 | kfree(tmp); | ||
447 | } | ||
448 | xt[af].compat_offsets = NULL; | ||
449 | } | 450 | } |
450 | } | 451 | } |
451 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); | 452 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); |
452 | 453 | ||
453 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) | 454 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) |
454 | { | 455 | { |
455 | struct compat_delta *tmp; | 456 | struct compat_delta *tmp = xt[af].compat_tab; |
456 | int delta; | 457 | int mid, left = 0, right = xt[af].cur - 1; |
457 | 458 | ||
458 | for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) | 459 | while (left <= right) { |
459 | if (tmp->offset < offset) | 460 | mid = (left + right) >> 1; |
460 | delta += tmp->delta; | 461 | if (offset > tmp[mid].offset) |
461 | return delta; | 462 | left = mid + 1; |
463 | else if (offset < tmp[mid].offset) | ||
464 | right = mid - 1; | ||
465 | else | ||
466 | return mid ? tmp[mid - 1].delta : 0; | ||
467 | } | ||
468 | WARN_ON_ONCE(1); | ||
469 | return 0; | ||
462 | } | 470 | } |
463 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); | 471 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); |
464 | 472 | ||
473 | void xt_compat_init_offsets(u_int8_t af, unsigned int number) | ||
474 | { | ||
475 | xt[af].number = number; | ||
476 | xt[af].cur = 0; | ||
477 | } | ||
478 | EXPORT_SYMBOL(xt_compat_init_offsets); | ||
479 | |||
465 | int xt_compat_match_offset(const struct xt_match *match) | 480 | int xt_compat_match_offset(const struct xt_match *match) |
466 | { | 481 | { |
467 | u_int16_t csize = match->compatsize ? : match->matchsize; | 482 | u_int16_t csize = match->compatsize ? : match->matchsize; |
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table, | |||
820 | */ | 835 | */ |
821 | local_bh_enable(); | 836 | local_bh_enable(); |
822 | 837 | ||
838 | #ifdef CONFIG_AUDIT | ||
839 | if (audit_enabled) { | ||
840 | struct audit_buffer *ab; | ||
841 | |||
842 | ab = audit_log_start(current->audit_context, GFP_KERNEL, | ||
843 | AUDIT_NETFILTER_CFG); | ||
844 | if (ab) { | ||
845 | audit_log_format(ab, "table=%s family=%u entries=%u", | ||
846 | table->name, table->af, | ||
847 | private->number); | ||
848 | audit_log_end(ab); | ||
849 | } | ||
850 | } | ||
851 | #endif | ||
852 | |||
823 | return private; | 853 | return private; |
824 | } | 854 | } |
825 | EXPORT_SYMBOL_GPL(xt_replace_table); | 855 | EXPORT_SYMBOL_GPL(xt_replace_table); |
@@ -1338,7 +1368,7 @@ static int __init xt_init(void) | |||
1338 | mutex_init(&xt[i].mutex); | 1368 | mutex_init(&xt[i].mutex); |
1339 | #ifdef CONFIG_COMPAT | 1369 | #ifdef CONFIG_COMPAT |
1340 | mutex_init(&xt[i].compat_mutex); | 1370 | mutex_init(&xt[i].compat_mutex); |
1341 | xt[i].compat_offsets = NULL; | 1371 | xt[i].compat_tab = NULL; |
1342 | #endif | 1372 | #endif |
1343 | INIT_LIST_HEAD(&xt[i].target); | 1373 | INIT_LIST_HEAD(&xt[i].target); |
1344 | INIT_LIST_HEAD(&xt[i].match); | 1374 | INIT_LIST_HEAD(&xt[i].match); |
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c new file mode 100644 index 000000000000..81802d27346e --- /dev/null +++ b/net/netfilter/xt_AUDIT.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /* | ||
2 | * Creates audit record for dropped/accepted packets | ||
3 | * | ||
4 | * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> | ||
5 | * (C) 2010-2011 Red Hat, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
14 | #include <linux/audit.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/tcp.h> | ||
18 | #include <linux/udp.h> | ||
19 | #include <linux/if_arp.h> | ||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | #include <linux/netfilter/xt_AUDIT.h> | ||
22 | #include <net/ipv6.h> | ||
23 | #include <net/ip.h> | ||
24 | |||
25 | MODULE_LICENSE("GPL"); | ||
26 | MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>"); | ||
27 | MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets"); | ||
28 | MODULE_ALIAS("ipt_AUDIT"); | ||
29 | MODULE_ALIAS("ip6t_AUDIT"); | ||
30 | MODULE_ALIAS("ebt_AUDIT"); | ||
31 | MODULE_ALIAS("arpt_AUDIT"); | ||
32 | |||
33 | static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, | ||
34 | unsigned int proto, unsigned int offset) | ||
35 | { | ||
36 | switch (proto) { | ||
37 | case IPPROTO_TCP: | ||
38 | case IPPROTO_UDP: | ||
39 | case IPPROTO_UDPLITE: { | ||
40 | const __be16 *pptr; | ||
41 | __be16 _ports[2]; | ||
42 | |||
43 | pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); | ||
44 | if (pptr == NULL) { | ||
45 | audit_log_format(ab, " truncated=1"); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | audit_log_format(ab, " sport=%hu dport=%hu", | ||
50 | ntohs(pptr[0]), ntohs(pptr[1])); | ||
51 | } | ||
52 | break; | ||
53 | |||
54 | case IPPROTO_ICMP: | ||
55 | case IPPROTO_ICMPV6: { | ||
56 | const u8 *iptr; | ||
57 | u8 _ih[2]; | ||
58 | |||
59 | iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); | ||
60 | if (iptr == NULL) { | ||
61 | audit_log_format(ab, " truncated=1"); | ||
62 | return; | ||
63 | } | ||
64 | |||
65 | audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", | ||
66 | iptr[0], iptr[1]); | ||
67 | |||
68 | } | ||
69 | break; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) | ||
74 | { | ||
75 | struct iphdr _iph; | ||
76 | const struct iphdr *ih; | ||
77 | |||
78 | ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); | ||
79 | if (!ih) { | ||
80 | audit_log_format(ab, " truncated=1"); | ||
81 | return; | ||
82 | } | ||
83 | |||
84 | audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", | ||
85 | &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); | ||
86 | |||
87 | if (ntohs(ih->frag_off) & IP_OFFSET) { | ||
88 | audit_log_format(ab, " frag=1"); | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | audit_proto(ab, skb, ih->protocol, ih->ihl * 4); | ||
93 | } | ||
94 | |||
95 | static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) | ||
96 | { | ||
97 | struct ipv6hdr _ip6h; | ||
98 | const struct ipv6hdr *ih; | ||
99 | u8 nexthdr; | ||
100 | int offset; | ||
101 | |||
102 | ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); | ||
103 | if (!ih) { | ||
104 | audit_log_format(ab, " truncated=1"); | ||
105 | return; | ||
106 | } | ||
107 | |||
108 | nexthdr = ih->nexthdr; | ||
109 | offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), | ||
110 | &nexthdr); | ||
111 | |||
112 | audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", | ||
113 | &ih->saddr, &ih->daddr, nexthdr); | ||
114 | |||
115 | if (offset) | ||
116 | audit_proto(ab, skb, nexthdr, offset); | ||
117 | } | ||
118 | |||
119 | static unsigned int | ||
120 | audit_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
121 | { | ||
122 | const struct xt_audit_info *info = par->targinfo; | ||
123 | struct audit_buffer *ab; | ||
124 | |||
125 | ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); | ||
126 | if (ab == NULL) | ||
127 | goto errout; | ||
128 | |||
129 | audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", | ||
130 | info->type, par->hooknum, skb->len, | ||
131 | par->in ? par->in->name : "?", | ||
132 | par->out ? par->out->name : "?"); | ||
133 | |||
134 | if (skb->mark) | ||
135 | audit_log_format(ab, " mark=%#x", skb->mark); | ||
136 | |||
137 | if (skb->dev && skb->dev->type == ARPHRD_ETHER) { | ||
138 | audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", | ||
139 | eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, | ||
140 | ntohs(eth_hdr(skb)->h_proto)); | ||
141 | |||
142 | if (par->family == NFPROTO_BRIDGE) { | ||
143 | switch (eth_hdr(skb)->h_proto) { | ||
144 | case __constant_htons(ETH_P_IP): | ||
145 | audit_ip4(ab, skb); | ||
146 | break; | ||
147 | |||
148 | case __constant_htons(ETH_P_IPV6): | ||
149 | audit_ip6(ab, skb); | ||
150 | break; | ||
151 | } | ||
152 | } | ||
153 | } | ||
154 | |||
155 | switch (par->family) { | ||
156 | case NFPROTO_IPV4: | ||
157 | audit_ip4(ab, skb); | ||
158 | break; | ||
159 | |||
160 | case NFPROTO_IPV6: | ||
161 | audit_ip6(ab, skb); | ||
162 | break; | ||
163 | } | ||
164 | |||
165 | audit_log_end(ab); | ||
166 | |||
167 | errout: | ||
168 | return XT_CONTINUE; | ||
169 | } | ||
170 | |||
171 | static int audit_tg_check(const struct xt_tgchk_param *par) | ||
172 | { | ||
173 | const struct xt_audit_info *info = par->targinfo; | ||
174 | |||
175 | if (info->type > XT_AUDIT_TYPE_MAX) { | ||
176 | pr_info("Audit type out of range (valid range: 0..%hhu)\n", | ||
177 | XT_AUDIT_TYPE_MAX); | ||
178 | return -ERANGE; | ||
179 | } | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static struct xt_target audit_tg_reg __read_mostly = { | ||
185 | .name = "AUDIT", | ||
186 | .family = NFPROTO_UNSPEC, | ||
187 | .target = audit_tg, | ||
188 | .targetsize = sizeof(struct xt_audit_info), | ||
189 | .checkentry = audit_tg_check, | ||
190 | .me = THIS_MODULE, | ||
191 | }; | ||
192 | |||
193 | static int __init audit_tg_init(void) | ||
194 | { | ||
195 | return xt_register_target(&audit_tg_reg); | ||
196 | } | ||
197 | |||
198 | static void __exit audit_tg_exit(void) | ||
199 | { | ||
200 | xt_unregister_target(&audit_tg_reg); | ||
201 | } | ||
202 | |||
203 | module_init(audit_tg_init); | ||
204 | module_exit(audit_tg_exit); | ||
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index c2c0e4abeb99..af9c4dadf816 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c | |||
@@ -19,12 +19,14 @@ | |||
19 | #include <linux/netfilter_ipv6.h> | 19 | #include <linux/netfilter_ipv6.h> |
20 | #include <linux/netfilter/x_tables.h> | 20 | #include <linux/netfilter/x_tables.h> |
21 | #include <linux/netfilter/xt_CLASSIFY.h> | 21 | #include <linux/netfilter/xt_CLASSIFY.h> |
22 | #include <linux/netfilter_arp.h> | ||
22 | 23 | ||
23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | 24 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); |
24 | MODULE_LICENSE("GPL"); | 25 | MODULE_LICENSE("GPL"); |
25 | MODULE_DESCRIPTION("Xtables: Qdisc classification"); | 26 | MODULE_DESCRIPTION("Xtables: Qdisc classification"); |
26 | MODULE_ALIAS("ipt_CLASSIFY"); | 27 | MODULE_ALIAS("ipt_CLASSIFY"); |
27 | MODULE_ALIAS("ip6t_CLASSIFY"); | 28 | MODULE_ALIAS("ip6t_CLASSIFY"); |
29 | MODULE_ALIAS("arpt_CLASSIFY"); | ||
28 | 30 | ||
29 | static unsigned int | 31 | static unsigned int |
30 | classify_tg(struct sk_buff *skb, const struct xt_action_param *par) | 32 | classify_tg(struct sk_buff *skb, const struct xt_action_param *par) |
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
35 | return XT_CONTINUE; | 37 | return XT_CONTINUE; |
36 | } | 38 | } |
37 | 39 | ||
38 | static struct xt_target classify_tg_reg __read_mostly = { | 40 | static struct xt_target classify_tg_reg[] __read_mostly = { |
39 | .name = "CLASSIFY", | 41 | { |
40 | .revision = 0, | 42 | .name = "CLASSIFY", |
41 | .family = NFPROTO_UNSPEC, | 43 | .revision = 0, |
42 | .table = "mangle", | 44 | .family = NFPROTO_UNSPEC, |
43 | .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | | 45 | .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | |
44 | (1 << NF_INET_POST_ROUTING), | 46 | (1 << NF_INET_POST_ROUTING), |
45 | .target = classify_tg, | 47 | .target = classify_tg, |
46 | .targetsize = sizeof(struct xt_classify_target_info), | 48 | .targetsize = sizeof(struct xt_classify_target_info), |
47 | .me = THIS_MODULE, | 49 | .me = THIS_MODULE, |
50 | }, | ||
51 | { | ||
52 | .name = "CLASSIFY", | ||
53 | .revision = 0, | ||
54 | .family = NFPROTO_ARP, | ||
55 | .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), | ||
56 | .target = classify_tg, | ||
57 | .targetsize = sizeof(struct xt_classify_target_info), | ||
58 | .me = THIS_MODULE, | ||
59 | }, | ||
48 | }; | 60 | }; |
49 | 61 | ||
50 | static int __init classify_tg_init(void) | 62 | static int __init classify_tg_init(void) |
51 | { | 63 | { |
52 | return xt_register_target(&classify_tg_reg); | 64 | return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); |
53 | } | 65 | } |
54 | 66 | ||
55 | static void __exit classify_tg_exit(void) | 67 | static void __exit classify_tg_exit(void) |
56 | { | 68 | { |
57 | xt_unregister_target(&classify_tg_reg); | 69 | xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); |
58 | } | 70 | } |
59 | 71 | ||
60 | module_init(classify_tg_init); | 72 | module_init(classify_tg_init); |
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index be1f22e13545..3bdd443aaf15 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c | |||
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); | |||
313 | MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); | 313 | MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); |
314 | MODULE_DESCRIPTION("Xtables: idle time monitor"); | 314 | MODULE_DESCRIPTION("Xtables: idle time monitor"); |
315 | MODULE_LICENSE("GPL v2"); | 315 | MODULE_LICENSE("GPL v2"); |
316 | MODULE_ALIAS("ipt_IDLETIMER"); | ||
317 | MODULE_ALIAS("ip6t_IDLETIMER"); | ||
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index a4140509eea1..993de2ba89d3 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c | |||
@@ -31,6 +31,8 @@ | |||
31 | MODULE_LICENSE("GPL"); | 31 | MODULE_LICENSE("GPL"); |
32 | MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); | 32 | MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); |
33 | MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); | 33 | MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); |
34 | MODULE_ALIAS("ipt_LED"); | ||
35 | MODULE_ALIAS("ip6t_LED"); | ||
34 | 36 | ||
35 | static LIST_HEAD(xt_led_triggers); | 37 | static LIST_HEAD(xt_led_triggers); |
36 | static DEFINE_MUTEX(xt_led_mutex); | 38 | static DEFINE_MUTEX(xt_led_mutex); |
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 039cce1bde3d..d4f4b5d66b20 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c | |||
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) | |||
72 | 72 | ||
73 | if (info->queues_total > 1) { | 73 | if (info->queues_total > 1) { |
74 | if (par->family == NFPROTO_IPV4) | 74 | if (par->family == NFPROTO_IPV4) |
75 | queue = hash_v4(skb) % info->queues_total + queue; | 75 | queue = (((u64) hash_v4(skb) * info->queues_total) >> |
76 | 32) + queue; | ||
76 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | 77 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) |
77 | else if (par->family == NFPROTO_IPV6) | 78 | else if (par->family == NFPROTO_IPV6) |
78 | queue = hash_v6(skb) % info->queues_total + queue; | 79 | queue = (((u64) hash_v6(skb) * info->queues_total) >> |
80 | 32) + queue; | ||
79 | #endif | 81 | #endif |
80 | } | 82 | } |
81 | return NF_QUEUE_NR(queue); | 83 | return NF_QUEUE_NR(queue); |
82 | } | 84 | } |
83 | 85 | ||
84 | static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) | 86 | static unsigned int |
87 | nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) | ||
85 | { | 88 | { |
86 | const struct xt_NFQ_info_v1 *info = par->targinfo; | 89 | const struct xt_NFQ_info_v2 *info = par->targinfo; |
90 | unsigned int ret = nfqueue_tg_v1(skb, par); | ||
91 | |||
92 | if (info->bypass) | ||
93 | ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | static int nfqueue_tg_check(const struct xt_tgchk_param *par) | ||
98 | { | ||
99 | const struct xt_NFQ_info_v2 *info = par->targinfo; | ||
87 | u32 maxid; | 100 | u32 maxid; |
88 | 101 | ||
89 | if (unlikely(!rnd_inited)) { | 102 | if (unlikely(!rnd_inited)) { |
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) | |||
100 | info->queues_total, maxid); | 113 | info->queues_total, maxid); |
101 | return -ERANGE; | 114 | return -ERANGE; |
102 | } | 115 | } |
116 | if (par->target->revision == 2 && info->bypass > 1) | ||
117 | return -EINVAL; | ||
103 | return 0; | 118 | return 0; |
104 | } | 119 | } |
105 | 120 | ||
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { | |||
115 | .name = "NFQUEUE", | 130 | .name = "NFQUEUE", |
116 | .revision = 1, | 131 | .revision = 1, |
117 | .family = NFPROTO_UNSPEC, | 132 | .family = NFPROTO_UNSPEC, |
118 | .checkentry = nfqueue_tg_v1_check, | 133 | .checkentry = nfqueue_tg_check, |
119 | .target = nfqueue_tg_v1, | 134 | .target = nfqueue_tg_v1, |
120 | .targetsize = sizeof(struct xt_NFQ_info_v1), | 135 | .targetsize = sizeof(struct xt_NFQ_info_v1), |
121 | .me = THIS_MODULE, | 136 | .me = THIS_MODULE, |
122 | }, | 137 | }, |
138 | { | ||
139 | .name = "NFQUEUE", | ||
140 | .revision = 2, | ||
141 | .family = NFPROTO_UNSPEC, | ||
142 | .checkentry = nfqueue_tg_check, | ||
143 | .target = nfqueue_tg_v2, | ||
144 | .targetsize = sizeof(struct xt_NFQ_info_v2), | ||
145 | .me = THIS_MODULE, | ||
146 | }, | ||
123 | }; | 147 | }; |
124 | 148 | ||
125 | static int __init nfqueue_tg_init(void) | 149 | static int __init nfqueue_tg_init(void) |
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 5c5b6b921b84..7fd3fd51f274 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c | |||
@@ -193,10 +193,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
193 | 193 | ||
194 | if (par->family == NFPROTO_IPV6) { | 194 | if (par->family == NFPROTO_IPV6) { |
195 | const struct ipv6hdr *iph = ipv6_hdr(skb); | 195 | const struct ipv6hdr *iph = ipv6_hdr(skb); |
196 | memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); | 196 | memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? |
197 | &iph->daddr : &iph->saddr, sizeof(addr.ip6)); | ||
197 | } else { | 198 | } else { |
198 | const struct iphdr *iph = ip_hdr(skb); | 199 | const struct iphdr *iph = ip_hdr(skb); |
199 | addr.ip = iph->saddr; | 200 | addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? |
201 | iph->daddr : iph->saddr; | ||
200 | } | 202 | } |
201 | 203 | ||
202 | spin_lock_bh(&info->data->lock); | 204 | spin_lock_bh(&info->data->lock); |
@@ -204,13 +206,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
204 | &info->mask, par->family); | 206 | &info->mask, par->family); |
205 | spin_unlock_bh(&info->data->lock); | 207 | spin_unlock_bh(&info->data->lock); |
206 | 208 | ||
207 | if (connections < 0) { | 209 | if (connections < 0) |
208 | /* kmalloc failed, drop it entirely */ | 210 | /* kmalloc failed, drop it entirely */ |
209 | par->hotdrop = true; | 211 | goto hotdrop; |
210 | return false; | ||
211 | } | ||
212 | 212 | ||
213 | return (connections > info->limit) ^ info->inverse; | 213 | return (connections > info->limit) ^ |
214 | !!(info->flags & XT_CONNLIMIT_INVERT); | ||
214 | 215 | ||
215 | hotdrop: | 216 | hotdrop: |
216 | par->hotdrop = true; | 217 | par->hotdrop = true; |
@@ -268,25 +269,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) | |||
268 | kfree(info->data); | 269 | kfree(info->data); |
269 | } | 270 | } |
270 | 271 | ||
271 | static struct xt_match connlimit_mt_reg __read_mostly = { | 272 | static struct xt_match connlimit_mt_reg[] __read_mostly = { |
272 | .name = "connlimit", | 273 | { |
273 | .revision = 0, | 274 | .name = "connlimit", |
274 | .family = NFPROTO_UNSPEC, | 275 | .revision = 0, |
275 | .checkentry = connlimit_mt_check, | 276 | .family = NFPROTO_UNSPEC, |
276 | .match = connlimit_mt, | 277 | .checkentry = connlimit_mt_check, |
277 | .matchsize = sizeof(struct xt_connlimit_info), | 278 | .match = connlimit_mt, |
278 | .destroy = connlimit_mt_destroy, | 279 | .matchsize = sizeof(struct xt_connlimit_info), |
279 | .me = THIS_MODULE, | 280 | .destroy = connlimit_mt_destroy, |
281 | .me = THIS_MODULE, | ||
282 | }, | ||
283 | { | ||
284 | .name = "connlimit", | ||
285 | .revision = 1, | ||
286 | .family = NFPROTO_UNSPEC, | ||
287 | .checkentry = connlimit_mt_check, | ||
288 | .match = connlimit_mt, | ||
289 | .matchsize = sizeof(struct xt_connlimit_info), | ||
290 | .destroy = connlimit_mt_destroy, | ||
291 | .me = THIS_MODULE, | ||
292 | }, | ||
280 | }; | 293 | }; |
281 | 294 | ||
282 | static int __init connlimit_mt_init(void) | 295 | static int __init connlimit_mt_init(void) |
283 | { | 296 | { |
284 | return xt_register_match(&connlimit_mt_reg); | 297 | return xt_register_matches(connlimit_mt_reg, |
298 | ARRAY_SIZE(connlimit_mt_reg)); | ||
285 | } | 299 | } |
286 | 300 | ||
287 | static void __exit connlimit_mt_exit(void) | 301 | static void __exit connlimit_mt_exit(void) |
288 | { | 302 | { |
289 | xt_unregister_match(&connlimit_mt_reg); | 303 | xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg)); |
290 | } | 304 | } |
291 | 305 | ||
292 | module_init(connlimit_mt_init); | 306 | module_init(connlimit_mt_init); |
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index e536710ad916..4ef1b63ad73f 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c | |||
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, | |||
112 | return true; | 112 | return true; |
113 | } | 113 | } |
114 | 114 | ||
115 | static inline bool | ||
116 | port_match(u16 min, u16 max, u16 port, bool invert) | ||
117 | { | ||
118 | return (port >= min && port <= max) ^ invert; | ||
119 | } | ||
120 | |||
121 | static inline bool | ||
122 | ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, | ||
123 | const struct nf_conn *ct) | ||
124 | { | ||
125 | const struct nf_conntrack_tuple *tuple; | ||
126 | |||
127 | tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
128 | if ((info->match_flags & XT_CONNTRACK_PROTO) && | ||
129 | (nf_ct_protonum(ct) == info->l4proto) ^ | ||
130 | !(info->invert_flags & XT_CONNTRACK_PROTO)) | ||
131 | return false; | ||
132 | |||
133 | /* Shortcut to match all recognized protocols by using ->src.all. */ | ||
134 | if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && | ||
135 | !port_match(info->origsrc_port, info->origsrc_port_high, | ||
136 | ntohs(tuple->src.u.all), | ||
137 | info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) | ||
138 | return false; | ||
139 | |||
140 | if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && | ||
141 | !port_match(info->origdst_port, info->origdst_port_high, | ||
142 | ntohs(tuple->dst.u.all), | ||
143 | info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) | ||
144 | return false; | ||
145 | |||
146 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
147 | |||
148 | if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && | ||
149 | !port_match(info->replsrc_port, info->replsrc_port_high, | ||
150 | ntohs(tuple->src.u.all), | ||
151 | info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) | ||
152 | return false; | ||
153 | |||
154 | if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && | ||
155 | !port_match(info->repldst_port, info->repldst_port_high, | ||
156 | ntohs(tuple->dst.u.all), | ||
157 | info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) | ||
158 | return false; | ||
159 | |||
160 | return true; | ||
161 | } | ||
162 | |||
115 | static bool | 163 | static bool |
116 | conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, | 164 | conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, |
117 | u16 state_mask, u16 status_mask) | 165 | u16 state_mask, u16 status_mask) |
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, | |||
170 | !(info->invert_flags & XT_CONNTRACK_REPLDST)) | 218 | !(info->invert_flags & XT_CONNTRACK_REPLDST)) |
171 | return false; | 219 | return false; |
172 | 220 | ||
173 | if (!ct_proto_port_check(info, ct)) | 221 | if (par->match->revision != 3) { |
174 | return false; | 222 | if (!ct_proto_port_check(info, ct)) |
223 | return false; | ||
224 | } else { | ||
225 | if (!ct_proto_port_check_v3(par->matchinfo, ct)) | ||
226 | return false; | ||
227 | } | ||
175 | 228 | ||
176 | if ((info->match_flags & XT_CONNTRACK_STATUS) && | 229 | if ((info->match_flags & XT_CONNTRACK_STATUS) && |
177 | (!!(status_mask & ct->status) ^ | 230 | (!!(status_mask & ct->status) ^ |
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) | |||
207 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); | 260 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); |
208 | } | 261 | } |
209 | 262 | ||
263 | static bool | ||
264 | conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) | ||
265 | { | ||
266 | const struct xt_conntrack_mtinfo3 *info = par->matchinfo; | ||
267 | |||
268 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); | ||
269 | } | ||
270 | |||
210 | static int conntrack_mt_check(const struct xt_mtchk_param *par) | 271 | static int conntrack_mt_check(const struct xt_mtchk_param *par) |
211 | { | 272 | { |
212 | int ret; | 273 | int ret; |
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { | |||
244 | .destroy = conntrack_mt_destroy, | 305 | .destroy = conntrack_mt_destroy, |
245 | .me = THIS_MODULE, | 306 | .me = THIS_MODULE, |
246 | }, | 307 | }, |
308 | { | ||
309 | .name = "conntrack", | ||
310 | .revision = 3, | ||
311 | .family = NFPROTO_UNSPEC, | ||
312 | .matchsize = sizeof(struct xt_conntrack_mtinfo3), | ||
313 | .match = conntrack_mt_v3, | ||
314 | .checkentry = conntrack_mt_check, | ||
315 | .destroy = conntrack_mt_destroy, | ||
316 | .me = THIS_MODULE, | ||
317 | }, | ||
247 | }; | 318 | }; |
248 | 319 | ||
249 | static int __init conntrack_mt_init(void) | 320 | static int __init conntrack_mt_init(void) |
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index b39db8a5cbae..c7a2e5466bc4 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c | |||
@@ -22,6 +22,8 @@ | |||
22 | MODULE_LICENSE("GPL"); | 22 | MODULE_LICENSE("GPL"); |
23 | MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); | 23 | MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); |
24 | MODULE_DESCRIPTION("Xtables: CPU match"); | 24 | MODULE_DESCRIPTION("Xtables: CPU match"); |
25 | MODULE_ALIAS("ipt_cpu"); | ||
26 | MODULE_ALIAS("ip6t_cpu"); | ||
25 | 27 | ||
26 | static int cpu_mt_check(const struct xt_mtchk_param *par) | 28 | static int cpu_mt_check(const struct xt_mtchk_param *par) |
27 | { | 29 | { |
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d8aa35..bb10b0717f1b 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c | |||
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
85 | /* | 85 | /* |
86 | * Check if the packet belongs to an existing entry | 86 | * Check if the packet belongs to an existing entry |
87 | */ | 87 | */ |
88 | cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); | 88 | cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); |
89 | if (unlikely(cp == NULL)) { | 89 | if (unlikely(cp == NULL)) { |
90 | match = false; | 90 | match = false; |
91 | goto out; | 91 | goto out; |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 91cb1d71f018..c60649ec1193 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -164,7 +164,6 @@ struct packet_mreq_max { | |||
164 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 164 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, |
165 | int closing, int tx_ring); | 165 | int closing, int tx_ring); |
166 | 166 | ||
167 | #define PGV_FROM_VMALLOC 1 | ||
168 | struct pgv { | 167 | struct pgv { |
169 | char *buffer; | 168 | char *buffer; |
170 | }; | 169 | }; |
@@ -523,11 +522,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb, | |||
523 | { | 522 | { |
524 | struct sk_filter *filter; | 523 | struct sk_filter *filter; |
525 | 524 | ||
526 | rcu_read_lock_bh(); | 525 | rcu_read_lock(); |
527 | filter = rcu_dereference_bh(sk->sk_filter); | 526 | filter = rcu_dereference(sk->sk_filter); |
528 | if (filter != NULL) | 527 | if (filter != NULL) |
529 | res = sk_run_filter(skb, filter->insns); | 528 | res = sk_run_filter(skb, filter->insns); |
530 | rcu_read_unlock_bh(); | 529 | rcu_read_unlock(); |
531 | 530 | ||
532 | return res; | 531 | return res; |
533 | } | 532 | } |
diff --git a/net/rds/rds.h b/net/rds/rds.h index 9542449c0720..da8adac2bf06 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...) | |||
50 | #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) | 50 | #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) |
51 | 51 | ||
52 | #define RDS_CONG_MAP_BYTES (65536 / 8) | 52 | #define RDS_CONG_MAP_BYTES (65536 / 8) |
53 | #define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) | ||
54 | #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) | 53 | #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) |
55 | #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) | 54 | #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) |
56 | 55 | ||
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f04d4a484d53..e318f458713e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -205,6 +205,18 @@ config NET_SCH_DRR | |||
205 | 205 | ||
206 | If unsure, say N. | 206 | If unsure, say N. |
207 | 207 | ||
208 | config NET_SCH_MQPRIO | ||
209 | tristate "Multi-queue priority scheduler (MQPRIO)" | ||
210 | help | ||
211 | Say Y here if you want to use the Multi-queue Priority scheduler. | ||
212 | This scheduler allows QOS to be offloaded on NICs that have support | ||
213 | for offloading QOS schedulers. | ||
214 | |||
215 | To compile this driver as a module, choose M here: the module will | ||
216 | be called sch_mqprio. | ||
217 | |||
218 | If unsure, say N. | ||
219 | |||
208 | config NET_SCH_INGRESS | 220 | config NET_SCH_INGRESS |
209 | tristate "Ingress Qdisc" | 221 | tristate "Ingress Qdisc" |
210 | depends on NET_CLS_ACT | 222 | depends on NET_CLS_ACT |
@@ -243,7 +255,7 @@ config NET_CLS_TCINDEX | |||
243 | 255 | ||
244 | config NET_CLS_ROUTE4 | 256 | config NET_CLS_ROUTE4 |
245 | tristate "Routing decision (ROUTE)" | 257 | tristate "Routing decision (ROUTE)" |
246 | select NET_CLS_ROUTE | 258 | select IP_ROUTE_CLASSID |
247 | select NET_CLS | 259 | select NET_CLS |
248 | ---help--- | 260 | ---help--- |
249 | If you say Y here, you will be able to classify packets | 261 | If you say Y here, you will be able to classify packets |
@@ -252,9 +264,6 @@ config NET_CLS_ROUTE4 | |||
252 | To compile this code as a module, choose M here: the | 264 | To compile this code as a module, choose M here: the |
253 | module will be called cls_route. | 265 | module will be called cls_route. |
254 | 266 | ||
255 | config NET_CLS_ROUTE | ||
256 | bool | ||
257 | |||
258 | config NET_CLS_FW | 267 | config NET_CLS_FW |
259 | tristate "Netfilter mark (FW)" | 268 | tristate "Netfilter mark (FW)" |
260 | select NET_CLS | 269 | select NET_CLS |
diff --git a/net/sched/Makefile b/net/sched/Makefile index 960f5dba6304..26ce681a2c60 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o | |||
32 | obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o | 32 | obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o |
33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | 33 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o |
34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 34 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
35 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | ||
35 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | 36 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o |
36 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | 37 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o |
37 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o | 38 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o |
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 23b25f89e7e0..15873e14cb54 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c | |||
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, | |||
78 | struct tc_action *a, struct tcf_hashinfo *hinfo) | 78 | struct tc_action *a, struct tcf_hashinfo *hinfo) |
79 | { | 79 | { |
80 | struct tcf_common *p; | 80 | struct tcf_common *p; |
81 | int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; | 81 | int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; |
82 | struct nlattr *nest; | 82 | struct nlattr *nest; |
83 | 83 | ||
84 | read_lock_bh(hinfo->lock); | 84 | read_lock_bh(hinfo->lock); |
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, | |||
126 | { | 126 | { |
127 | struct tcf_common *p, *s_p; | 127 | struct tcf_common *p, *s_p; |
128 | struct nlattr *nest; | 128 | struct nlattr *nest; |
129 | int i= 0, n_i = 0; | 129 | int i = 0, n_i = 0; |
130 | 130 | ||
131 | nest = nla_nest_start(skb, a->order); | 131 | nest = nla_nest_start(skb, a->order); |
132 | if (nest == NULL) | 132 | if (nest == NULL) |
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, | |||
138 | while (p != NULL) { | 138 | while (p != NULL) { |
139 | s_p = p->tcfc_next; | 139 | s_p = p->tcfc_next; |
140 | if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) | 140 | if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) |
141 | module_put(a->ops->owner); | 141 | module_put(a->ops->owner); |
142 | n_i++; | 142 | n_i++; |
143 | p = s_p; | 143 | p = s_p; |
144 | } | 144 | } |
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | |||
447 | nest = nla_nest_start(skb, TCA_OPTIONS); | 447 | nest = nla_nest_start(skb, TCA_OPTIONS); |
448 | if (nest == NULL) | 448 | if (nest == NULL) |
449 | goto nla_put_failure; | 449 | goto nla_put_failure; |
450 | if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { | 450 | err = tcf_action_dump_old(skb, a, bind, ref); |
451 | if (err > 0) { | ||
451 | nla_nest_end(skb, nest); | 452 | nla_nest_end(skb, nest); |
452 | return err; | 453 | return err; |
453 | } | 454 | } |
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, | |||
491 | struct tc_action *a; | 492 | struct tc_action *a; |
492 | struct tc_action_ops *a_o; | 493 | struct tc_action_ops *a_o; |
493 | char act_name[IFNAMSIZ]; | 494 | char act_name[IFNAMSIZ]; |
494 | struct nlattr *tb[TCA_ACT_MAX+1]; | 495 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
495 | struct nlattr *kind; | 496 | struct nlattr *kind; |
496 | int err; | 497 | int err; |
497 | 498 | ||
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, | |||
549 | goto err_free; | 550 | goto err_free; |
550 | 551 | ||
551 | /* module count goes up only when brand new policy is created | 552 | /* module count goes up only when brand new policy is created |
552 | if it exists and is only bound to in a_o->init() then | 553 | * if it exists and is only bound to in a_o->init() then |
553 | ACT_P_CREATED is not returned (a zero is). | 554 | * ACT_P_CREATED is not returned (a zero is). |
554 | */ | 555 | */ |
555 | if (err != ACT_P_CREATED) | 556 | if (err != ACT_P_CREATED) |
556 | module_put(a_o->owner); | 557 | module_put(a_o->owner); |
557 | a->ops = a_o; | 558 | a->ops = a_o; |
@@ -569,7 +570,7 @@ err_out: | |||
569 | struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, | 570 | struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, |
570 | char *name, int ovr, int bind) | 571 | char *name, int ovr, int bind) |
571 | { | 572 | { |
572 | struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; | 573 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
573 | struct tc_action *head = NULL, *act, *act_prev = NULL; | 574 | struct tc_action *head = NULL, *act, *act_prev = NULL; |
574 | int err; | 575 | int err; |
575 | int i; | 576 | int i; |
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, | |||
697 | static struct tc_action * | 698 | static struct tc_action * |
698 | tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) | 699 | tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) |
699 | { | 700 | { |
700 | struct nlattr *tb[TCA_ACT_MAX+1]; | 701 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
701 | struct tc_action *a; | 702 | struct tc_action *a; |
702 | int index; | 703 | int index; |
703 | int err; | 704 | int err; |
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | |||
770 | struct tcamsg *t; | 771 | struct tcamsg *t; |
771 | struct netlink_callback dcb; | 772 | struct netlink_callback dcb; |
772 | struct nlattr *nest; | 773 | struct nlattr *nest; |
773 | struct nlattr *tb[TCA_ACT_MAX+1]; | 774 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
774 | struct nlattr *kind; | 775 | struct nlattr *kind; |
775 | struct tc_action *a = create_a(0); | 776 | struct tc_action *a = create_a(0); |
776 | int err = -ENOMEM; | 777 | int err = -ENOMEM; |
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | |||
821 | nlh->nlmsg_flags |= NLM_F_ROOT; | 822 | nlh->nlmsg_flags |= NLM_F_ROOT; |
822 | module_put(a->ops->owner); | 823 | module_put(a->ops->owner); |
823 | kfree(a); | 824 | kfree(a); |
824 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 825 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
826 | n->nlmsg_flags & NLM_F_ECHO); | ||
825 | if (err > 0) | 827 | if (err > 0) |
826 | return 0; | 828 | return 0; |
827 | 829 | ||
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
842 | u32 pid, int event) | 844 | u32 pid, int event) |
843 | { | 845 | { |
844 | int i, ret; | 846 | int i, ret; |
845 | struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; | 847 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
846 | struct tc_action *head = NULL, *act, *act_prev = NULL; | 848 | struct tc_action *head = NULL, *act, *act_prev = NULL; |
847 | 849 | ||
848 | ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); | 850 | ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); |
849 | if (ret < 0) | 851 | if (ret < 0) |
850 | return ret; | 852 | return ret; |
851 | 853 | ||
852 | if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { | 854 | if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { |
853 | if (tb[1] != NULL) | 855 | if (tb[1] != NULL) |
854 | return tca_action_flush(net, tb[1], n, pid); | 856 | return tca_action_flush(net, tb[1], n, pid); |
855 | else | 857 | else |
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
892 | /* now do the delete */ | 894 | /* now do the delete */ |
893 | tcf_action_destroy(head, 0); | 895 | tcf_action_destroy(head, 0); |
894 | ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, | 896 | ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
895 | n->nlmsg_flags&NLM_F_ECHO); | 897 | n->nlmsg_flags & NLM_F_ECHO); |
896 | if (ret > 0) | 898 | if (ret > 0) |
897 | return 0; | 899 | return 0; |
898 | return ret; | 900 | return ret; |
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, | |||
936 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | 938 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
937 | NETLINK_CB(skb).dst_group = RTNLGRP_TC; | 939 | NETLINK_CB(skb).dst_group = RTNLGRP_TC; |
938 | 940 | ||
939 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); | 941 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); |
940 | if (err > 0) | 942 | if (err > 0) |
941 | err = 0; | 943 | err = 0; |
942 | return err; | 944 | return err; |
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
967 | 969 | ||
968 | /* dump then free all the actions after update; inserted policy | 970 | /* dump then free all the actions after update; inserted policy |
969 | * stays intact | 971 | * stays intact |
970 | * */ | 972 | */ |
971 | ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); | 973 | ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); |
972 | for (a = act; a; a = act) { | 974 | for (a = act; a; a = act) { |
973 | act = a->next; | 975 | act = a->next; |
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
993 | return -EINVAL; | 995 | return -EINVAL; |
994 | } | 996 | } |
995 | 997 | ||
996 | /* n->nlmsg_flags&NLM_F_CREATE | 998 | /* n->nlmsg_flags & NLM_F_CREATE */ |
997 | * */ | ||
998 | switch (n->nlmsg_type) { | 999 | switch (n->nlmsg_type) { |
999 | case RTM_NEWACTION: | 1000 | case RTM_NEWACTION: |
1000 | /* we are going to assume all other flags | 1001 | /* we are going to assume all other flags |
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1003 | * but since we want avoid ambiguity (eg when flags | 1004 | * but since we want avoid ambiguity (eg when flags |
1004 | * is zero) then just set this | 1005 | * is zero) then just set this |
1005 | */ | 1006 | */ |
1006 | if (n->nlmsg_flags&NLM_F_REPLACE) | 1007 | if (n->nlmsg_flags & NLM_F_REPLACE) |
1007 | ovr = 1; | 1008 | ovr = 1; |
1008 | replay: | 1009 | replay: |
1009 | ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); | 1010 | ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); |
@@ -1028,7 +1029,7 @@ replay: | |||
1028 | static struct nlattr * | 1029 | static struct nlattr * |
1029 | find_dump_kind(const struct nlmsghdr *n) | 1030 | find_dump_kind(const struct nlmsghdr *n) |
1030 | { | 1031 | { |
1031 | struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; | 1032 | struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; |
1032 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; | 1033 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
1033 | struct nlattr *nla[TCAA_MAX + 1]; | 1034 | struct nlattr *nla[TCAA_MAX + 1]; |
1034 | struct nlattr *kind; | 1035 | struct nlattr *kind; |
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) | |||
1071 | } | 1072 | } |
1072 | 1073 | ||
1073 | a_o = tc_lookup_action(kind); | 1074 | a_o = tc_lookup_action(kind); |
1074 | if (a_o == NULL) { | 1075 | if (a_o == NULL) |
1075 | return 0; | 1076 | return 0; |
1076 | } | ||
1077 | 1077 | ||
1078 | memset(&a, 0, sizeof(struct tc_action)); | 1078 | memset(&a, 0, sizeof(struct tc_action)); |
1079 | a.ops = a_o; | 1079 | a.ops = a_o; |
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 83ddfc07e45d..6cdf9abe475f 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c | |||
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, | |||
63 | if (nla == NULL) | 63 | if (nla == NULL) |
64 | return -EINVAL; | 64 | return -EINVAL; |
65 | 65 | ||
66 | err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); | 66 | err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy); |
67 | if (err < 0) | 67 | if (err < 0) |
68 | return err; | 68 | return err; |
69 | 69 | ||
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index c2ed90a4c0b4..2b4ab4b05ce8 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c | |||
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact) | |||
50 | } | 50 | } |
51 | 51 | ||
52 | typedef int (*g_rand)(struct tcf_gact *gact); | 52 | typedef int (*g_rand)(struct tcf_gact *gact); |
53 | static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; | 53 | static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ }; |
54 | #endif /* CONFIG_GACT_PROB */ | 54 | #endif /* CONFIG_GACT_PROB */ |
55 | 55 | ||
56 | static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { | 56 | static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { |
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, | |||
89 | pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), | 89 | pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), |
90 | bind, &gact_idx_gen, &gact_hash_info); | 90 | bind, &gact_idx_gen, &gact_hash_info); |
91 | if (IS_ERR(pc)) | 91 | if (IS_ERR(pc)) |
92 | return PTR_ERR(pc); | 92 | return PTR_ERR(pc); |
93 | ret = ACT_P_CREATED; | 93 | ret = ACT_P_CREATED; |
94 | } else { | 94 | } else { |
95 | if (!ovr) { | 95 | if (!ovr) { |
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL"); | |||
205 | static int __init gact_init_module(void) | 205 | static int __init gact_init_module(void) |
206 | { | 206 | { |
207 | #ifdef CONFIG_GACT_PROB | 207 | #ifdef CONFIG_GACT_PROB |
208 | printk(KERN_INFO "GACT probability on\n"); | 208 | pr_info("GACT probability on\n"); |
209 | #else | 209 | #else |
210 | printk(KERN_INFO "GACT probability NOT on\n"); | 210 | pr_info("GACT probability NOT on\n"); |
211 | #endif | 211 | #endif |
212 | return tcf_register_action(&act_gact_ops); | 212 | return tcf_register_action(&act_gact_ops); |
213 | } | 213 | } |
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index c2a7c20e81c1..9fc211a1b20e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c | |||
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
138 | pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, | 138 | pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, |
139 | &ipt_idx_gen, &ipt_hash_info); | 139 | &ipt_idx_gen, &ipt_hash_info); |
140 | if (IS_ERR(pc)) | 140 | if (IS_ERR(pc)) |
141 | return PTR_ERR(pc); | 141 | return PTR_ERR(pc); |
142 | ret = ACT_P_CREATED; | 142 | ret = ACT_P_CREATED; |
143 | } else { | 143 | } else { |
144 | if (!ovr) { | 144 | if (!ovr) { |
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
162 | if (unlikely(!t)) | 162 | if (unlikely(!t)) |
163 | goto err2; | 163 | goto err2; |
164 | 164 | ||
165 | if ((err = ipt_init_target(t, tname, hook)) < 0) | 165 | err = ipt_init_target(t, tname, hook); |
166 | if (err < 0) | ||
166 | goto err3; | 167 | goto err3; |
167 | 168 | ||
168 | spin_lock_bh(&ipt->tcf_lock); | 169 | spin_lock_bh(&ipt->tcf_lock); |
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, | |||
212 | bstats_update(&ipt->tcf_bstats, skb); | 213 | bstats_update(&ipt->tcf_bstats, skb); |
213 | 214 | ||
214 | /* yes, we have to worry about both in and out dev | 215 | /* yes, we have to worry about both in and out dev |
215 | worry later - danger - this API seems to have changed | 216 | * worry later - danger - this API seems to have changed |
216 | from earlier kernels */ | 217 | * from earlier kernels |
218 | */ | ||
217 | par.in = skb->dev; | 219 | par.in = skb->dev; |
218 | par.out = NULL; | 220 | par.out = NULL; |
219 | par.hooknum = ipt->tcfi_hook; | 221 | par.hooknum = ipt->tcfi_hook; |
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int | |||
253 | struct tc_cnt c; | 255 | struct tc_cnt c; |
254 | 256 | ||
255 | /* for simple targets kernel size == user size | 257 | /* for simple targets kernel size == user size |
256 | ** user name = target name | 258 | * user name = target name |
257 | ** for foolproof you need to not assume this | 259 | * for foolproof you need to not assume this |
258 | */ | 260 | */ |
259 | 261 | ||
260 | t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); | 262 | t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); |
261 | if (unlikely(!t)) | 263 | if (unlikely(!t)) |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index d765067e99db..961386e2f2c0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = { | |||
41 | .lock = &mirred_lock, | 41 | .lock = &mirred_lock, |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) | 44 | static int tcf_mirred_release(struct tcf_mirred *m, int bind) |
45 | { | 45 | { |
46 | if (m) { | 46 | if (m) { |
47 | if (bind) | 47 | if (bind) |
48 | m->tcf_bindcnt--; | 48 | m->tcf_bindcnt--; |
49 | m->tcf_refcnt--; | 49 | m->tcf_refcnt--; |
50 | if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { | 50 | if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) { |
51 | list_del(&m->tcfm_list); | 51 | list_del(&m->tcfm_list); |
52 | if (m->tcfm_dev) | 52 | if (m->tcfm_dev) |
53 | dev_put(m->tcfm_dev); | 53 | dev_put(m->tcfm_dev); |
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 178a4bd7b7cb..762b027650a9 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c | |||
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, | |||
69 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, | 69 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, |
70 | &nat_idx_gen, &nat_hash_info); | 70 | &nat_idx_gen, &nat_hash_info); |
71 | if (IS_ERR(pc)) | 71 | if (IS_ERR(pc)) |
72 | return PTR_ERR(pc); | 72 | return PTR_ERR(pc); |
73 | p = to_tcf_nat(pc); | 73 | p = to_tcf_nat(pc); |
74 | ret = ACT_P_CREATED; | 74 | ret = ACT_P_CREATED; |
75 | } else { | 75 | } else { |
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 445bef716f77..50c7c06c019d 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c | |||
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, | |||
70 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, | 70 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, |
71 | &pedit_idx_gen, &pedit_hash_info); | 71 | &pedit_idx_gen, &pedit_hash_info); |
72 | if (IS_ERR(pc)) | 72 | if (IS_ERR(pc)) |
73 | return PTR_ERR(pc); | 73 | return PTR_ERR(pc); |
74 | p = to_pedit(pc); | 74 | p = to_pedit(pc); |
75 | keys = kmalloc(ksize, GFP_KERNEL); | 75 | keys = kmalloc(ksize, GFP_KERNEL); |
76 | if (keys == NULL) { | 76 | if (keys == NULL) { |
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, | |||
127 | int i, munged = 0; | 127 | int i, munged = 0; |
128 | unsigned int off; | 128 | unsigned int off; |
129 | 129 | ||
130 | if (skb_cloned(skb)) { | 130 | if (skb_cloned(skb) && |
131 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { | 131 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
132 | return p->tcf_action; | 132 | return p->tcf_action; |
133 | } | ||
134 | } | ||
135 | 133 | ||
136 | off = skb_network_offset(skb); | 134 | off = skb_network_offset(skb); |
137 | 135 | ||
diff --git a/net/sched/act_police.c b/net/sched/act_police.c index e2f08b1e2e58..8a1630774fd6 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c | |||
@@ -22,8 +22,8 @@ | |||
22 | #include <net/act_api.h> | 22 | #include <net/act_api.h> |
23 | #include <net/netlink.h> | 23 | #include <net/netlink.h> |
24 | 24 | ||
25 | #define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) | 25 | #define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) |
26 | #define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) | 26 | #define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) |
27 | 27 | ||
28 | #define POL_TAB_MASK 15 | 28 | #define POL_TAB_MASK 15 |
29 | static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; | 29 | static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; |
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = { | |||
37 | }; | 37 | }; |
38 | 38 | ||
39 | /* old policer structure from before tc actions */ | 39 | /* old policer structure from before tc actions */ |
40 | struct tc_police_compat | 40 | struct tc_police_compat { |
41 | { | ||
42 | u32 index; | 41 | u32 index; |
43 | int action; | 42 | int action; |
44 | u32 limit; | 43 | u32 limit; |
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { | |||
139 | static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, | 138 | static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, |
140 | struct tc_action *a, int ovr, int bind) | 139 | struct tc_action *a, int ovr, int bind) |
141 | { | 140 | { |
142 | unsigned h; | 141 | unsigned int h; |
143 | int ret = 0, err; | 142 | int ret = 0, err; |
144 | struct nlattr *tb[TCA_POLICE_MAX + 1]; | 143 | struct nlattr *tb[TCA_POLICE_MAX + 1]; |
145 | struct tc_police *parm; | 144 | struct tc_police *parm; |
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7287cff7af3e..a34a22de60b3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c | |||
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result | |||
47 | /* print policy string followed by _ then packet count | 47 | /* print policy string followed by _ then packet count |
48 | * Example if this was the 3rd packet and the string was "hello" | 48 | * Example if this was the 3rd packet and the string was "hello" |
49 | * then it would look like "hello_3" (without quotes) | 49 | * then it would look like "hello_3" (without quotes) |
50 | **/ | 50 | */ |
51 | pr_info("simple: %s_%d\n", | 51 | pr_info("simple: %s_%d\n", |
52 | (char *)d->tcfd_defdata, d->tcf_bstats.packets); | 52 | (char *)d->tcfd_defdata, d->tcf_bstats.packets); |
53 | spin_unlock(&d->tcf_lock); | 53 | spin_unlock(&d->tcf_lock); |
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, | |||
125 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, | 125 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, |
126 | &simp_idx_gen, &simp_hash_info); | 126 | &simp_idx_gen, &simp_hash_info); |
127 | if (IS_ERR(pc)) | 127 | if (IS_ERR(pc)) |
128 | return PTR_ERR(pc); | 128 | return PTR_ERR(pc); |
129 | 129 | ||
130 | d = to_defact(pc); | 130 | d = to_defact(pc); |
131 | ret = alloc_defdata(d, defdata); | 131 | ret = alloc_defdata(d, defdata); |
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, | |||
149 | return ret; | 149 | return ret; |
150 | } | 150 | } |
151 | 151 | ||
152 | static inline int tcf_simp_cleanup(struct tc_action *a, int bind) | 152 | static int tcf_simp_cleanup(struct tc_action *a, int bind) |
153 | { | 153 | { |
154 | struct tcf_defact *d = a->priv; | 154 | struct tcf_defact *d = a->priv; |
155 | 155 | ||
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind) | |||
158 | return 0; | 158 | return 0; |
159 | } | 159 | } |
160 | 160 | ||
161 | static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, | 161 | static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, |
162 | int bind, int ref) | 162 | int bind, int ref) |
163 | { | 163 | { |
164 | unsigned char *b = skb_tail_pointer(skb); | 164 | unsigned char *b = skb_tail_pointer(skb); |
165 | struct tcf_defact *d = a->priv; | 165 | struct tcf_defact *d = a->priv; |
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 836f5fee9e58..5f6f0c7c3905 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c | |||
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, | |||
113 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, | 113 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, |
114 | &skbedit_idx_gen, &skbedit_hash_info); | 114 | &skbedit_idx_gen, &skbedit_hash_info); |
115 | if (IS_ERR(pc)) | 115 | if (IS_ERR(pc)) |
116 | return PTR_ERR(pc); | 116 | return PTR_ERR(pc); |
117 | 117 | ||
118 | d = to_skbedit(pc); | 118 | d = to_skbedit(pc); |
119 | ret = ACT_P_CREATED; | 119 | ret = ACT_P_CREATED; |
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, | |||
144 | return ret; | 144 | return ret; |
145 | } | 145 | } |
146 | 146 | ||
147 | static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) | 147 | static int tcf_skbedit_cleanup(struct tc_action *a, int bind) |
148 | { | 148 | { |
149 | struct tcf_skbedit *d = a->priv; | 149 | struct tcf_skbedit *d = a->priv; |
150 | 150 | ||
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) | |||
153 | return 0; | 153 | return 0; |
154 | } | 154 | } |
155 | 155 | ||
156 | static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, | 156 | static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, |
157 | int bind, int ref) | 157 | int bind, int ref) |
158 | { | 158 | { |
159 | unsigned char *b = skb_tail_pointer(skb); | 159 | unsigned char *b = skb_tail_pointer(skb); |
160 | struct tcf_skbedit *d = a->priv; | 160 | struct tcf_skbedit *d = a->priv; |
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5fd0c28ef79a..bb2c523f8158 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c | |||
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) | |||
85 | int rc = -ENOENT; | 85 | int rc = -ENOENT; |
86 | 86 | ||
87 | write_lock(&cls_mod_lock); | 87 | write_lock(&cls_mod_lock); |
88 | for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) | 88 | for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) |
89 | if (t == ops) | 89 | if (t == ops) |
90 | break; | 90 | break; |
91 | 91 | ||
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) | |||
111 | u32 first = TC_H_MAKE(0xC0000000U, 0U); | 111 | u32 first = TC_H_MAKE(0xC0000000U, 0U); |
112 | 112 | ||
113 | if (tp) | 113 | if (tp) |
114 | first = tp->prio-1; | 114 | first = tp->prio - 1; |
115 | 115 | ||
116 | return first; | 116 | return first; |
117 | } | 117 | } |
@@ -149,7 +149,8 @@ replay: | |||
149 | 149 | ||
150 | if (prio == 0) { | 150 | if (prio == 0) { |
151 | /* If no priority is given, user wants we allocated it. */ | 151 | /* If no priority is given, user wants we allocated it. */ |
152 | if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) | 152 | if (n->nlmsg_type != RTM_NEWTFILTER || |
153 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
153 | return -ENOENT; | 154 | return -ENOENT; |
154 | prio = TC_H_MAKE(0x80000000U, 0U); | 155 | prio = TC_H_MAKE(0x80000000U, 0U); |
155 | } | 156 | } |
@@ -176,7 +177,8 @@ replay: | |||
176 | } | 177 | } |
177 | 178 | ||
178 | /* Is it classful? */ | 179 | /* Is it classful? */ |
179 | if ((cops = q->ops->cl_ops) == NULL) | 180 | cops = q->ops->cl_ops; |
181 | if (!cops) | ||
180 | return -EINVAL; | 182 | return -EINVAL; |
181 | 183 | ||
182 | if (cops->tcf_chain == NULL) | 184 | if (cops->tcf_chain == NULL) |
@@ -196,10 +198,11 @@ replay: | |||
196 | goto errout; | 198 | goto errout; |
197 | 199 | ||
198 | /* Check the chain for existence of proto-tcf with this priority */ | 200 | /* Check the chain for existence of proto-tcf with this priority */ |
199 | for (back = chain; (tp=*back) != NULL; back = &tp->next) { | 201 | for (back = chain; (tp = *back) != NULL; back = &tp->next) { |
200 | if (tp->prio >= prio) { | 202 | if (tp->prio >= prio) { |
201 | if (tp->prio == prio) { | 203 | if (tp->prio == prio) { |
202 | if (!nprio || (tp->protocol != protocol && protocol)) | 204 | if (!nprio || |
205 | (tp->protocol != protocol && protocol)) | ||
203 | goto errout; | 206 | goto errout; |
204 | } else | 207 | } else |
205 | tp = NULL; | 208 | tp = NULL; |
@@ -216,7 +219,8 @@ replay: | |||
216 | goto errout; | 219 | goto errout; |
217 | 220 | ||
218 | err = -ENOENT; | 221 | err = -ENOENT; |
219 | if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) | 222 | if (n->nlmsg_type != RTM_NEWTFILTER || |
223 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
220 | goto errout; | 224 | goto errout; |
221 | 225 | ||
222 | 226 | ||
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
420 | 424 | ||
421 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) | 425 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) |
422 | return skb->len; | 426 | return skb->len; |
423 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 427 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
428 | if (!dev) | ||
424 | return skb->len; | 429 | return skb->len; |
425 | 430 | ||
426 | if (!tcm->tcm_parent) | 431 | if (!tcm->tcm_parent) |
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
429 | q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); | 434 | q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); |
430 | if (!q) | 435 | if (!q) |
431 | goto out; | 436 | goto out; |
432 | if ((cops = q->ops->cl_ops) == NULL) | 437 | cops = q->ops->cl_ops; |
438 | if (!cops) | ||
433 | goto errout; | 439 | goto errout; |
434 | if (cops->tcf_chain == NULL) | 440 | if (cops->tcf_chain == NULL) |
435 | goto errout; | 441 | goto errout; |
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
444 | 450 | ||
445 | s_t = cb->args[0]; | 451 | s_t = cb->args[0]; |
446 | 452 | ||
447 | for (tp=*chain, t=0; tp; tp = tp->next, t++) { | 453 | for (tp = *chain, t = 0; tp; tp = tp->next, t++) { |
448 | if (t < s_t) continue; | 454 | if (t < s_t) |
455 | continue; | ||
449 | if (TC_H_MAJ(tcm->tcm_info) && | 456 | if (TC_H_MAJ(tcm->tcm_info) && |
450 | TC_H_MAJ(tcm->tcm_info) != tp->prio) | 457 | TC_H_MAJ(tcm->tcm_info) != tp->prio) |
451 | continue; | 458 | continue; |
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
468 | arg.skb = skb; | 475 | arg.skb = skb; |
469 | arg.cb = cb; | 476 | arg.cb = cb; |
470 | arg.w.stop = 0; | 477 | arg.w.stop = 0; |
471 | arg.w.skip = cb->args[1]-1; | 478 | arg.w.skip = cb->args[1] - 1; |
472 | arg.w.count = 0; | 479 | arg.w.count = 0; |
473 | tp->ops->walk(tp, &arg.w); | 480 | tp->ops->walk(tp, &arg.w); |
474 | cb->args[1] = arg.w.count+1; | 481 | cb->args[1] = arg.w.count + 1; |
475 | if (arg.w.stop) | 482 | if (arg.w.stop) |
476 | break; | 483 | break; |
477 | } | 484 | } |
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f23d9155b1ef..8be8872dd571 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c | |||
@@ -21,14 +21,12 @@ | |||
21 | #include <net/act_api.h> | 21 | #include <net/act_api.h> |
22 | #include <net/pkt_cls.h> | 22 | #include <net/pkt_cls.h> |
23 | 23 | ||
24 | struct basic_head | 24 | struct basic_head { |
25 | { | ||
26 | u32 hgenerator; | 25 | u32 hgenerator; |
27 | struct list_head flist; | 26 | struct list_head flist; |
28 | }; | 27 | }; |
29 | 28 | ||
30 | struct basic_filter | 29 | struct basic_filter { |
31 | { | ||
32 | u32 handle; | 30 | u32 handle; |
33 | struct tcf_exts exts; | 31 | struct tcf_exts exts; |
34 | struct tcf_ematch_tree ematches; | 32 | struct tcf_ematch_tree ematches; |
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp) | |||
92 | return 0; | 90 | return 0; |
93 | } | 91 | } |
94 | 92 | ||
95 | static inline void basic_delete_filter(struct tcf_proto *tp, | 93 | static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f) |
96 | struct basic_filter *f) | ||
97 | { | 94 | { |
98 | tcf_unbind_filter(tp, &f->res); | 95 | tcf_unbind_filter(tp, &f->res); |
99 | tcf_exts_destroy(tp, &f->exts); | 96 | tcf_exts_destroy(tp, &f->exts); |
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { | |||
135 | [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, | 132 | [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, |
136 | }; | 133 | }; |
137 | 134 | ||
138 | static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, | 135 | static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, |
139 | unsigned long base, struct nlattr **tb, | 136 | unsigned long base, struct nlattr **tb, |
140 | struct nlattr *est) | 137 | struct nlattr *est) |
141 | { | 138 | { |
142 | int err = -EINVAL; | 139 | int err = -EINVAL; |
143 | struct tcf_exts e; | 140 | struct tcf_exts e; |
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
203 | } while (--i > 0 && basic_get(tp, head->hgenerator)); | 200 | } while (--i > 0 && basic_get(tp, head->hgenerator)); |
204 | 201 | ||
205 | if (i <= 0) { | 202 | if (i <= 0) { |
206 | printk(KERN_ERR "Insufficient number of handles\n"); | 203 | pr_err("Insufficient number of handles\n"); |
207 | goto errout; | 204 | goto errout; |
208 | } | 205 | } |
209 | 206 | ||
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d49c40fb7e09..32a335194ca5 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c | |||
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | |||
56 | { | 56 | { |
57 | struct cgroup_cls_state *cs; | 57 | struct cgroup_cls_state *cs; |
58 | 58 | ||
59 | if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) | 59 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
60 | if (!cs) | ||
60 | return ERR_PTR(-ENOMEM); | 61 | return ERR_PTR(-ENOMEM); |
61 | 62 | ||
62 | if (cgrp->parent) | 63 | if (cgrp->parent) |
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
94 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); | 95 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); |
95 | } | 96 | } |
96 | 97 | ||
97 | struct cls_cgroup_head | 98 | struct cls_cgroup_head { |
98 | { | ||
99 | u32 handle; | 99 | u32 handle; |
100 | struct tcf_exts exts; | 100 | struct tcf_exts exts; |
101 | struct tcf_ematch_tree ematches; | 101 | struct tcf_ematch_tree ematches; |
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, | |||
166 | u32 handle, struct nlattr **tca, | 166 | u32 handle, struct nlattr **tca, |
167 | unsigned long *arg) | 167 | unsigned long *arg) |
168 | { | 168 | { |
169 | struct nlattr *tb[TCA_CGROUP_MAX+1]; | 169 | struct nlattr *tb[TCA_CGROUP_MAX + 1]; |
170 | struct cls_cgroup_head *head = tp->root; | 170 | struct cls_cgroup_head *head = tp->root; |
171 | struct tcf_ematch_tree t; | 171 | struct tcf_ematch_tree t; |
172 | struct tcf_exts e; | 172 | struct tcf_exts e; |
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5b271a18bc3a..8ec01391d988 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c | |||
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb) | |||
121 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 121 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
122 | break; | 122 | break; |
123 | iph = ip_hdr(skb); | 123 | iph = ip_hdr(skb); |
124 | if (iph->frag_off & htons(IP_MF|IP_OFFSET)) | 124 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
125 | break; | 125 | break; |
126 | poff = proto_ports_offset(iph->protocol); | 126 | poff = proto_ports_offset(iph->protocol); |
127 | if (poff >= 0 && | 127 | if (poff >= 0 && |
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb) | |||
163 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 163 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
164 | break; | 164 | break; |
165 | iph = ip_hdr(skb); | 165 | iph = ip_hdr(skb); |
166 | if (iph->frag_off & htons(IP_MF|IP_OFFSET)) | 166 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
167 | break; | 167 | break; |
168 | poff = proto_ports_offset(iph->protocol); | 168 | poff = proto_ports_offset(iph->protocol); |
169 | if (poff >= 0 && | 169 | if (poff >= 0 && |
@@ -276,7 +276,7 @@ fallback: | |||
276 | 276 | ||
277 | static u32 flow_get_rtclassid(const struct sk_buff *skb) | 277 | static u32 flow_get_rtclassid(const struct sk_buff *skb) |
278 | { | 278 | { |
279 | #ifdef CONFIG_NET_CLS_ROUTE | 279 | #ifdef CONFIG_IP_ROUTE_CLASSID |
280 | if (skb_dst(skb)) | 280 | if (skb_dst(skb)) |
281 | return skb_dst(skb)->tclassid; | 281 | return skb_dst(skb)->tclassid; |
282 | #endif | 282 | #endif |
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 93b0a7b6f9b4..26e7bc4ffb79 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c | |||
@@ -31,14 +31,12 @@ | |||
31 | 31 | ||
32 | #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) | 32 | #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) |
33 | 33 | ||
34 | struct fw_head | 34 | struct fw_head { |
35 | { | ||
36 | struct fw_filter *ht[HTSIZE]; | 35 | struct fw_filter *ht[HTSIZE]; |
37 | u32 mask; | 36 | u32 mask; |
38 | }; | 37 | }; |
39 | 38 | ||
40 | struct fw_filter | 39 | struct fw_filter { |
41 | { | ||
42 | struct fw_filter *next; | 40 | struct fw_filter *next; |
43 | u32 id; | 41 | u32 id; |
44 | struct tcf_result res; | 42 | struct tcf_result res; |
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = { | |||
53 | .police = TCA_FW_POLICE | 51 | .police = TCA_FW_POLICE |
54 | }; | 52 | }; |
55 | 53 | ||
56 | static __inline__ int fw_hash(u32 handle) | 54 | static inline int fw_hash(u32 handle) |
57 | { | 55 | { |
58 | if (HTSIZE == 4096) | 56 | if (HTSIZE == 4096) |
59 | return ((handle >> 24) & 0xFFF) ^ | 57 | return ((handle >> 24) & 0xFFF) ^ |
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle) | |||
82 | static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | 80 | static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, |
83 | struct tcf_result *res) | 81 | struct tcf_result *res) |
84 | { | 82 | { |
85 | struct fw_head *head = (struct fw_head*)tp->root; | 83 | struct fw_head *head = (struct fw_head *)tp->root; |
86 | struct fw_filter *f; | 84 | struct fw_filter *f; |
87 | int r; | 85 | int r; |
88 | u32 id = skb->mark; | 86 | u32 id = skb->mark; |
89 | 87 | ||
90 | if (head != NULL) { | 88 | if (head != NULL) { |
91 | id &= head->mask; | 89 | id &= head->mask; |
92 | for (f=head->ht[fw_hash(id)]; f; f=f->next) { | 90 | for (f = head->ht[fw_hash(id)]; f; f = f->next) { |
93 | if (f->id == id) { | 91 | if (f->id == id) { |
94 | *res = f->res; | 92 | *res = f->res; |
95 | #ifdef CONFIG_NET_CLS_IND | 93 | #ifdef CONFIG_NET_CLS_IND |
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
105 | } | 103 | } |
106 | } else { | 104 | } else { |
107 | /* old method */ | 105 | /* old method */ |
108 | if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { | 106 | if (id && (TC_H_MAJ(id) == 0 || |
107 | !(TC_H_MAJ(id ^ tp->q->handle)))) { | ||
109 | res->classid = id; | 108 | res->classid = id; |
110 | res->class = 0; | 109 | res->class = 0; |
111 | return 0; | 110 | return 0; |
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
117 | 116 | ||
118 | static unsigned long fw_get(struct tcf_proto *tp, u32 handle) | 117 | static unsigned long fw_get(struct tcf_proto *tp, u32 handle) |
119 | { | 118 | { |
120 | struct fw_head *head = (struct fw_head*)tp->root; | 119 | struct fw_head *head = (struct fw_head *)tp->root; |
121 | struct fw_filter *f; | 120 | struct fw_filter *f; |
122 | 121 | ||
123 | if (head == NULL) | 122 | if (head == NULL) |
124 | return 0; | 123 | return 0; |
125 | 124 | ||
126 | for (f=head->ht[fw_hash(handle)]; f; f=f->next) { | 125 | for (f = head->ht[fw_hash(handle)]; f; f = f->next) { |
127 | if (f->id == handle) | 126 | if (f->id == handle) |
128 | return (unsigned long)f; | 127 | return (unsigned long)f; |
129 | } | 128 | } |
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp) | |||
139 | return 0; | 138 | return 0; |
140 | } | 139 | } |
141 | 140 | ||
142 | static inline void | 141 | static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) |
143 | fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) | ||
144 | { | 142 | { |
145 | tcf_unbind_filter(tp, &f->res); | 143 | tcf_unbind_filter(tp, &f->res); |
146 | tcf_exts_destroy(tp, &f->exts); | 144 | tcf_exts_destroy(tp, &f->exts); |
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp) | |||
156 | if (head == NULL) | 154 | if (head == NULL) |
157 | return; | 155 | return; |
158 | 156 | ||
159 | for (h=0; h<HTSIZE; h++) { | 157 | for (h = 0; h < HTSIZE; h++) { |
160 | while ((f=head->ht[h]) != NULL) { | 158 | while ((f = head->ht[h]) != NULL) { |
161 | head->ht[h] = f->next; | 159 | head->ht[h] = f->next; |
162 | fw_delete_filter(tp, f); | 160 | fw_delete_filter(tp, f); |
163 | } | 161 | } |
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp) | |||
167 | 165 | ||
168 | static int fw_delete(struct tcf_proto *tp, unsigned long arg) | 166 | static int fw_delete(struct tcf_proto *tp, unsigned long arg) |
169 | { | 167 | { |
170 | struct fw_head *head = (struct fw_head*)tp->root; | 168 | struct fw_head *head = (struct fw_head *)tp->root; |
171 | struct fw_filter *f = (struct fw_filter*)arg; | 169 | struct fw_filter *f = (struct fw_filter *)arg; |
172 | struct fw_filter **fp; | 170 | struct fw_filter **fp; |
173 | 171 | ||
174 | if (head == NULL || f == NULL) | 172 | if (head == NULL || f == NULL) |
175 | goto out; | 173 | goto out; |
176 | 174 | ||
177 | for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { | 175 | for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { |
178 | if (*fp == f) { | 176 | if (*fp == f) { |
179 | tcf_tree_lock(tp); | 177 | tcf_tree_lock(tp); |
180 | *fp = f->next; | 178 | *fp = f->next; |
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, | |||
240 | struct nlattr **tca, | 238 | struct nlattr **tca, |
241 | unsigned long *arg) | 239 | unsigned long *arg) |
242 | { | 240 | { |
243 | struct fw_head *head = (struct fw_head*)tp->root; | 241 | struct fw_head *head = (struct fw_head *)tp->root; |
244 | struct fw_filter *f = (struct fw_filter *) *arg; | 242 | struct fw_filter *f = (struct fw_filter *) *arg; |
245 | struct nlattr *opt = tca[TCA_OPTIONS]; | 243 | struct nlattr *opt = tca[TCA_OPTIONS]; |
246 | struct nlattr *tb[TCA_FW_MAX + 1]; | 244 | struct nlattr *tb[TCA_FW_MAX + 1]; |
@@ -302,7 +300,7 @@ errout: | |||
302 | 300 | ||
303 | static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 301 | static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
304 | { | 302 | { |
305 | struct fw_head *head = (struct fw_head*)tp->root; | 303 | struct fw_head *head = (struct fw_head *)tp->root; |
306 | int h; | 304 | int h; |
307 | 305 | ||
308 | if (head == NULL) | 306 | if (head == NULL) |
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, | |||
332 | struct sk_buff *skb, struct tcmsg *t) | 330 | struct sk_buff *skb, struct tcmsg *t) |
333 | { | 331 | { |
334 | struct fw_head *head = (struct fw_head *)tp->root; | 332 | struct fw_head *head = (struct fw_head *)tp->root; |
335 | struct fw_filter *f = (struct fw_filter*)fh; | 333 | struct fw_filter *f = (struct fw_filter *)fh; |
336 | unsigned char *b = skb_tail_pointer(skb); | 334 | unsigned char *b = skb_tail_pointer(skb); |
337 | struct nlattr *nest; | 335 | struct nlattr *nest; |
338 | 336 | ||
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 694dcd85dec8..d580cdfca093 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c | |||
@@ -23,34 +23,30 @@ | |||
23 | #include <net/pkt_cls.h> | 23 | #include <net/pkt_cls.h> |
24 | 24 | ||
25 | /* | 25 | /* |
26 | 1. For now we assume that route tags < 256. | 26 | * 1. For now we assume that route tags < 256. |
27 | It allows to use direct table lookups, instead of hash tables. | 27 | * It allows to use direct table lookups, instead of hash tables. |
28 | 2. For now we assume that "from TAG" and "fromdev DEV" statements | 28 | * 2. For now we assume that "from TAG" and "fromdev DEV" statements |
29 | are mutually exclusive. | 29 | * are mutually exclusive. |
30 | 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" | 30 | * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" |
31 | */ | 31 | */ |
32 | 32 | ||
33 | struct route4_fastmap | 33 | struct route4_fastmap { |
34 | { | ||
35 | struct route4_filter *filter; | 34 | struct route4_filter *filter; |
36 | u32 id; | 35 | u32 id; |
37 | int iif; | 36 | int iif; |
38 | }; | 37 | }; |
39 | 38 | ||
40 | struct route4_head | 39 | struct route4_head { |
41 | { | ||
42 | struct route4_fastmap fastmap[16]; | 40 | struct route4_fastmap fastmap[16]; |
43 | struct route4_bucket *table[256+1]; | 41 | struct route4_bucket *table[256 + 1]; |
44 | }; | 42 | }; |
45 | 43 | ||
46 | struct route4_bucket | 44 | struct route4_bucket { |
47 | { | ||
48 | /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ | 45 | /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ |
49 | struct route4_filter *ht[16+16+1]; | 46 | struct route4_filter *ht[16 + 16 + 1]; |
50 | }; | 47 | }; |
51 | 48 | ||
52 | struct route4_filter | 49 | struct route4_filter { |
53 | { | ||
54 | struct route4_filter *next; | 50 | struct route4_filter *next; |
55 | u32 id; | 51 | u32 id; |
56 | int iif; | 52 | int iif; |
@@ -61,20 +57,20 @@ struct route4_filter | |||
61 | struct route4_bucket *bkt; | 57 | struct route4_bucket *bkt; |
62 | }; | 58 | }; |
63 | 59 | ||
64 | #define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) | 60 | #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) |
65 | 61 | ||
66 | static const struct tcf_ext_map route_ext_map = { | 62 | static const struct tcf_ext_map route_ext_map = { |
67 | .police = TCA_ROUTE4_POLICE, | 63 | .police = TCA_ROUTE4_POLICE, |
68 | .action = TCA_ROUTE4_ACT | 64 | .action = TCA_ROUTE4_ACT |
69 | }; | 65 | }; |
70 | 66 | ||
71 | static __inline__ int route4_fastmap_hash(u32 id, int iif) | 67 | static inline int route4_fastmap_hash(u32 id, int iif) |
72 | { | 68 | { |
73 | return id&0xF; | 69 | return id & 0xF; |
74 | } | 70 | } |
75 | 71 | ||
76 | static inline | 72 | static void |
77 | void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) | 73 | route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) |
78 | { | 74 | { |
79 | spinlock_t *root_lock = qdisc_root_sleeping_lock(q); | 75 | spinlock_t *root_lock = qdisc_root_sleeping_lock(q); |
80 | 76 | ||
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) | |||
83 | spin_unlock_bh(root_lock); | 79 | spin_unlock_bh(root_lock); |
84 | } | 80 | } |
85 | 81 | ||
86 | static inline void | 82 | static void |
87 | route4_set_fastmap(struct route4_head *head, u32 id, int iif, | 83 | route4_set_fastmap(struct route4_head *head, u32 id, int iif, |
88 | struct route4_filter *f) | 84 | struct route4_filter *f) |
89 | { | 85 | { |
90 | int h = route4_fastmap_hash(id, iif); | 86 | int h = route4_fastmap_hash(id, iif); |
87 | |||
91 | head->fastmap[h].id = id; | 88 | head->fastmap[h].id = id; |
92 | head->fastmap[h].iif = iif; | 89 | head->fastmap[h].iif = iif; |
93 | head->fastmap[h].filter = f; | 90 | head->fastmap[h].filter = f; |
94 | } | 91 | } |
95 | 92 | ||
96 | static __inline__ int route4_hash_to(u32 id) | 93 | static inline int route4_hash_to(u32 id) |
97 | { | 94 | { |
98 | return id&0xFF; | 95 | return id & 0xFF; |
99 | } | 96 | } |
100 | 97 | ||
101 | static __inline__ int route4_hash_from(u32 id) | 98 | static inline int route4_hash_from(u32 id) |
102 | { | 99 | { |
103 | return (id>>16)&0xF; | 100 | return (id >> 16) & 0xF; |
104 | } | 101 | } |
105 | 102 | ||
106 | static __inline__ int route4_hash_iif(int iif) | 103 | static inline int route4_hash_iif(int iif) |
107 | { | 104 | { |
108 | return 16 + ((iif>>16)&0xF); | 105 | return 16 + ((iif >> 16) & 0xF); |
109 | } | 106 | } |
110 | 107 | ||
111 | static __inline__ int route4_hash_wild(void) | 108 | static inline int route4_hash_wild(void) |
112 | { | 109 | { |
113 | return 32; | 110 | return 32; |
114 | } | 111 | } |
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void) | |||
131 | static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, | 128 | static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, |
132 | struct tcf_result *res) | 129 | struct tcf_result *res) |
133 | { | 130 | { |
134 | struct route4_head *head = (struct route4_head*)tp->root; | 131 | struct route4_head *head = (struct route4_head *)tp->root; |
135 | struct dst_entry *dst; | 132 | struct dst_entry *dst; |
136 | struct route4_bucket *b; | 133 | struct route4_bucket *b; |
137 | struct route4_filter *f; | 134 | struct route4_filter *f; |
138 | u32 id, h; | 135 | u32 id, h; |
139 | int iif, dont_cache = 0; | 136 | int iif, dont_cache = 0; |
140 | 137 | ||
141 | if ((dst = skb_dst(skb)) == NULL) | 138 | dst = skb_dst(skb); |
139 | if (!dst) | ||
142 | goto failure; | 140 | goto failure; |
143 | 141 | ||
144 | id = dst->tclassid; | 142 | id = dst->tclassid; |
145 | if (head == NULL) | 143 | if (head == NULL) |
146 | goto old_method; | 144 | goto old_method; |
147 | 145 | ||
148 | iif = ((struct rtable*)dst)->fl.iif; | 146 | iif = ((struct rtable *)dst)->fl.iif; |
149 | 147 | ||
150 | h = route4_fastmap_hash(id, iif); | 148 | h = route4_fastmap_hash(id, iif); |
151 | if (id == head->fastmap[h].id && | 149 | if (id == head->fastmap[h].id && |
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
161 | h = route4_hash_to(id); | 159 | h = route4_hash_to(id); |
162 | 160 | ||
163 | restart: | 161 | restart: |
164 | if ((b = head->table[h]) != NULL) { | 162 | b = head->table[h]; |
163 | if (b) { | ||
165 | for (f = b->ht[route4_hash_from(id)]; f; f = f->next) | 164 | for (f = b->ht[route4_hash_from(id)]; f; f = f->next) |
166 | if (f->id == id) | 165 | if (f->id == id) |
167 | ROUTE4_APPLY_RESULT(); | 166 | ROUTE4_APPLY_RESULT(); |
@@ -197,8 +196,9 @@ old_method: | |||
197 | 196 | ||
198 | static inline u32 to_hash(u32 id) | 197 | static inline u32 to_hash(u32 id) |
199 | { | 198 | { |
200 | u32 h = id&0xFF; | 199 | u32 h = id & 0xFF; |
201 | if (id&0x8000) | 200 | |
201 | if (id & 0x8000) | ||
202 | h += 256; | 202 | h += 256; |
203 | return h; | 203 | return h; |
204 | } | 204 | } |
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id) | |||
211 | if (!(id & 0x8000)) { | 211 | if (!(id & 0x8000)) { |
212 | if (id > 255) | 212 | if (id > 255) |
213 | return 256; | 213 | return 256; |
214 | return id&0xF; | 214 | return id & 0xF; |
215 | } | 215 | } |
216 | return 16 + (id&0xF); | 216 | return 16 + (id & 0xF); |
217 | } | 217 | } |
218 | 218 | ||
219 | static unsigned long route4_get(struct tcf_proto *tp, u32 handle) | 219 | static unsigned long route4_get(struct tcf_proto *tp, u32 handle) |
220 | { | 220 | { |
221 | struct route4_head *head = (struct route4_head*)tp->root; | 221 | struct route4_head *head = (struct route4_head *)tp->root; |
222 | struct route4_bucket *b; | 222 | struct route4_bucket *b; |
223 | struct route4_filter *f; | 223 | struct route4_filter *f; |
224 | unsigned h1, h2; | 224 | unsigned int h1, h2; |
225 | 225 | ||
226 | if (!head) | 226 | if (!head) |
227 | return 0; | 227 | return 0; |
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) | |||
230 | if (h1 > 256) | 230 | if (h1 > 256) |
231 | return 0; | 231 | return 0; |
232 | 232 | ||
233 | h2 = from_hash(handle>>16); | 233 | h2 = from_hash(handle >> 16); |
234 | if (h2 > 32) | 234 | if (h2 > 32) |
235 | return 0; | 235 | return 0; |
236 | 236 | ||
237 | if ((b = head->table[h1]) != NULL) { | 237 | b = head->table[h1]; |
238 | if (b) { | ||
238 | for (f = b->ht[h2]; f; f = f->next) | 239 | for (f = b->ht[h2]; f; f = f->next) |
239 | if (f->handle == handle) | 240 | if (f->handle == handle) |
240 | return (unsigned long)f; | 241 | return (unsigned long)f; |
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp) | |||
251 | return 0; | 252 | return 0; |
252 | } | 253 | } |
253 | 254 | ||
254 | static inline void | 255 | static void |
255 | route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) | 256 | route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) |
256 | { | 257 | { |
257 | tcf_unbind_filter(tp, &f->res); | 258 | tcf_unbind_filter(tp, &f->res); |
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp) | |||
267 | if (head == NULL) | 268 | if (head == NULL) |
268 | return; | 269 | return; |
269 | 270 | ||
270 | for (h1=0; h1<=256; h1++) { | 271 | for (h1 = 0; h1 <= 256; h1++) { |
271 | struct route4_bucket *b; | 272 | struct route4_bucket *b; |
272 | 273 | ||
273 | if ((b = head->table[h1]) != NULL) { | 274 | b = head->table[h1]; |
274 | for (h2=0; h2<=32; h2++) { | 275 | if (b) { |
276 | for (h2 = 0; h2 <= 32; h2++) { | ||
275 | struct route4_filter *f; | 277 | struct route4_filter *f; |
276 | 278 | ||
277 | while ((f = b->ht[h2]) != NULL) { | 279 | while ((f = b->ht[h2]) != NULL) { |
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp) | |||
287 | 289 | ||
288 | static int route4_delete(struct tcf_proto *tp, unsigned long arg) | 290 | static int route4_delete(struct tcf_proto *tp, unsigned long arg) |
289 | { | 291 | { |
290 | struct route4_head *head = (struct route4_head*)tp->root; | 292 | struct route4_head *head = (struct route4_head *)tp->root; |
291 | struct route4_filter **fp, *f = (struct route4_filter*)arg; | 293 | struct route4_filter **fp, *f = (struct route4_filter *)arg; |
292 | unsigned h = 0; | 294 | unsigned int h = 0; |
293 | struct route4_bucket *b; | 295 | struct route4_bucket *b; |
294 | int i; | 296 | int i; |
295 | 297 | ||
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) | |||
299 | h = f->handle; | 301 | h = f->handle; |
300 | b = f->bkt; | 302 | b = f->bkt; |
301 | 303 | ||
302 | for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { | 304 | for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) { |
303 | if (*fp == f) { | 305 | if (*fp == f) { |
304 | tcf_tree_lock(tp); | 306 | tcf_tree_lock(tp); |
305 | *fp = f->next; | 307 | *fp = f->next; |
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) | |||
310 | 312 | ||
311 | /* Strip tree */ | 313 | /* Strip tree */ |
312 | 314 | ||
313 | for (i=0; i<=32; i++) | 315 | for (i = 0; i <= 32; i++) |
314 | if (b->ht[i]) | 316 | if (b->ht[i]) |
315 | return 0; | 317 | return 0; |
316 | 318 | ||
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, | |||
380 | } | 382 | } |
381 | 383 | ||
382 | h1 = to_hash(nhandle); | 384 | h1 = to_hash(nhandle); |
383 | if ((b = head->table[h1]) == NULL) { | 385 | b = head->table[h1]; |
386 | if (!b) { | ||
384 | err = -ENOBUFS; | 387 | err = -ENOBUFS; |
385 | b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); | 388 | b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); |
386 | if (b == NULL) | 389 | if (b == NULL) |
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, | |||
391 | tcf_tree_unlock(tp); | 394 | tcf_tree_unlock(tp); |
392 | } else { | 395 | } else { |
393 | unsigned int h2 = from_hash(nhandle >> 16); | 396 | unsigned int h2 = from_hash(nhandle >> 16); |
397 | |||
394 | err = -EEXIST; | 398 | err = -EEXIST; |
395 | for (fp = b->ht[h2]; fp; fp = fp->next) | 399 | for (fp = b->ht[h2]; fp; fp = fp->next) |
396 | if (fp->handle == f->handle) | 400 | if (fp->handle == f->handle) |
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, | |||
444 | if (err < 0) | 448 | if (err < 0) |
445 | return err; | 449 | return err; |
446 | 450 | ||
447 | if ((f = (struct route4_filter*)*arg) != NULL) { | 451 | f = (struct route4_filter *)*arg; |
452 | if (f) { | ||
448 | if (f->handle != handle && handle) | 453 | if (f->handle != handle && handle) |
449 | return -EINVAL; | 454 | return -EINVAL; |
450 | 455 | ||
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, | |||
481 | 486 | ||
482 | reinsert: | 487 | reinsert: |
483 | h = from_hash(f->handle >> 16); | 488 | h = from_hash(f->handle >> 16); |
484 | for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) | 489 | for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next) |
485 | if (f->handle < f1->handle) | 490 | if (f->handle < f1->handle) |
486 | break; | 491 | break; |
487 | 492 | ||
@@ -492,7 +497,8 @@ reinsert: | |||
492 | if (old_handle && f->handle != old_handle) { | 497 | if (old_handle && f->handle != old_handle) { |
493 | th = to_hash(old_handle); | 498 | th = to_hash(old_handle); |
494 | h = from_hash(old_handle >> 16); | 499 | h = from_hash(old_handle >> 16); |
495 | if ((b = head->table[th]) != NULL) { | 500 | b = head->table[th]; |
501 | if (b) { | ||
496 | for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { | 502 | for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { |
497 | if (*fp == f) { | 503 | if (*fp == f) { |
498 | *fp = f->next; | 504 | *fp = f->next; |
@@ -515,7 +521,7 @@ errout: | |||
515 | static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 521 | static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
516 | { | 522 | { |
517 | struct route4_head *head = tp->root; | 523 | struct route4_head *head = tp->root; |
518 | unsigned h, h1; | 524 | unsigned int h, h1; |
519 | 525 | ||
520 | if (head == NULL) | 526 | if (head == NULL) |
521 | arg->stop = 1; | 527 | arg->stop = 1; |
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
549 | static int route4_dump(struct tcf_proto *tp, unsigned long fh, | 555 | static int route4_dump(struct tcf_proto *tp, unsigned long fh, |
550 | struct sk_buff *skb, struct tcmsg *t) | 556 | struct sk_buff *skb, struct tcmsg *t) |
551 | { | 557 | { |
552 | struct route4_filter *f = (struct route4_filter*)fh; | 558 | struct route4_filter *f = (struct route4_filter *)fh; |
553 | unsigned char *b = skb_tail_pointer(skb); | 559 | unsigned char *b = skb_tail_pointer(skb); |
554 | struct nlattr *nest; | 560 | struct nlattr *nest; |
555 | u32 id; | 561 | u32 id; |
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, | |||
563 | if (nest == NULL) | 569 | if (nest == NULL) |
564 | goto nla_put_failure; | 570 | goto nla_put_failure; |
565 | 571 | ||
566 | if (!(f->handle&0x8000)) { | 572 | if (!(f->handle & 0x8000)) { |
567 | id = f->id&0xFF; | 573 | id = f->id & 0xFF; |
568 | NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); | 574 | NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); |
569 | } | 575 | } |
570 | if (f->handle&0x80000000) { | 576 | if (f->handle & 0x80000000) { |
571 | if ((f->handle>>16) != 0xFFFF) | 577 | if ((f->handle >> 16) != 0xFFFF) |
572 | NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); | 578 | NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); |
573 | } else { | 579 | } else { |
574 | id = f->id>>16; | 580 | id = f->id >> 16; |
575 | NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); | 581 | NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); |
576 | } | 582 | } |
577 | if (f->res.classid) | 583 | if (f->res.classid) |
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 425a1790b048..402c44b241a3 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h | |||
@@ -66,28 +66,25 @@ | |||
66 | powerful classification engine. */ | 66 | powerful classification engine. */ |
67 | 67 | ||
68 | 68 | ||
69 | struct rsvp_head | 69 | struct rsvp_head { |
70 | { | ||
71 | u32 tmap[256/32]; | 70 | u32 tmap[256/32]; |
72 | u32 hgenerator; | 71 | u32 hgenerator; |
73 | u8 tgenerator; | 72 | u8 tgenerator; |
74 | struct rsvp_session *ht[256]; | 73 | struct rsvp_session *ht[256]; |
75 | }; | 74 | }; |
76 | 75 | ||
77 | struct rsvp_session | 76 | struct rsvp_session { |
78 | { | ||
79 | struct rsvp_session *next; | 77 | struct rsvp_session *next; |
80 | __be32 dst[RSVP_DST_LEN]; | 78 | __be32 dst[RSVP_DST_LEN]; |
81 | struct tc_rsvp_gpi dpi; | 79 | struct tc_rsvp_gpi dpi; |
82 | u8 protocol; | 80 | u8 protocol; |
83 | u8 tunnelid; | 81 | u8 tunnelid; |
84 | /* 16 (src,sport) hash slots, and one wildcard source slot */ | 82 | /* 16 (src,sport) hash slots, and one wildcard source slot */ |
85 | struct rsvp_filter *ht[16+1]; | 83 | struct rsvp_filter *ht[16 + 1]; |
86 | }; | 84 | }; |
87 | 85 | ||
88 | 86 | ||
89 | struct rsvp_filter | 87 | struct rsvp_filter { |
90 | { | ||
91 | struct rsvp_filter *next; | 88 | struct rsvp_filter *next; |
92 | __be32 src[RSVP_DST_LEN]; | 89 | __be32 src[RSVP_DST_LEN]; |
93 | struct tc_rsvp_gpi spi; | 90 | struct tc_rsvp_gpi spi; |
@@ -100,17 +97,19 @@ struct rsvp_filter | |||
100 | struct rsvp_session *sess; | 97 | struct rsvp_session *sess; |
101 | }; | 98 | }; |
102 | 99 | ||
103 | static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) | 100 | static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) |
104 | { | 101 | { |
105 | unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; | 102 | unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; |
103 | |||
106 | h ^= h>>16; | 104 | h ^= h>>16; |
107 | h ^= h>>8; | 105 | h ^= h>>8; |
108 | return (h ^ protocol ^ tunnelid) & 0xFF; | 106 | return (h ^ protocol ^ tunnelid) & 0xFF; |
109 | } | 107 | } |
110 | 108 | ||
111 | static __inline__ unsigned hash_src(__be32 *src) | 109 | static inline unsigned int hash_src(__be32 *src) |
112 | { | 110 | { |
113 | unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; | 111 | unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; |
112 | |||
114 | h ^= h>>16; | 113 | h ^= h>>16; |
115 | h ^= h>>8; | 114 | h ^= h>>8; |
116 | h ^= h>>4; | 115 | h ^= h>>4; |
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = { | |||
134 | static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, | 133 | static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, |
135 | struct tcf_result *res) | 134 | struct tcf_result *res) |
136 | { | 135 | { |
137 | struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; | 136 | struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; |
138 | struct rsvp_session *s; | 137 | struct rsvp_session *s; |
139 | struct rsvp_filter *f; | 138 | struct rsvp_filter *f; |
140 | unsigned h1, h2; | 139 | unsigned int h1, h2; |
141 | __be32 *dst, *src; | 140 | __be32 *dst, *src; |
142 | u8 protocol; | 141 | u8 protocol; |
143 | u8 tunnelid = 0; | 142 | u8 tunnelid = 0; |
@@ -162,13 +161,13 @@ restart: | |||
162 | src = &nhptr->saddr.s6_addr32[0]; | 161 | src = &nhptr->saddr.s6_addr32[0]; |
163 | dst = &nhptr->daddr.s6_addr32[0]; | 162 | dst = &nhptr->daddr.s6_addr32[0]; |
164 | protocol = nhptr->nexthdr; | 163 | protocol = nhptr->nexthdr; |
165 | xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); | 164 | xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); |
166 | #else | 165 | #else |
167 | src = &nhptr->saddr; | 166 | src = &nhptr->saddr; |
168 | dst = &nhptr->daddr; | 167 | dst = &nhptr->daddr; |
169 | protocol = nhptr->protocol; | 168 | protocol = nhptr->protocol; |
170 | xprt = ((u8*)nhptr) + (nhptr->ihl<<2); | 169 | xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); |
171 | if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) | 170 | if (nhptr->frag_off & htons(IP_MF | IP_OFFSET)) |
172 | return -1; | 171 | return -1; |
173 | #endif | 172 | #endif |
174 | 173 | ||
@@ -176,10 +175,10 @@ restart: | |||
176 | h2 = hash_src(src); | 175 | h2 = hash_src(src); |
177 | 176 | ||
178 | for (s = sht[h1]; s; s = s->next) { | 177 | for (s = sht[h1]; s; s = s->next) { |
179 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && | 178 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && |
180 | protocol == s->protocol && | 179 | protocol == s->protocol && |
181 | !(s->dpi.mask & | 180 | !(s->dpi.mask & |
182 | (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && | 181 | (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && |
183 | #if RSVP_DST_LEN == 4 | 182 | #if RSVP_DST_LEN == 4 |
184 | dst[0] == s->dst[0] && | 183 | dst[0] == s->dst[0] && |
185 | dst[1] == s->dst[1] && | 184 | dst[1] == s->dst[1] && |
@@ -188,8 +187,8 @@ restart: | |||
188 | tunnelid == s->tunnelid) { | 187 | tunnelid == s->tunnelid) { |
189 | 188 | ||
190 | for (f = s->ht[h2]; f; f = f->next) { | 189 | for (f = s->ht[h2]; f; f = f->next) { |
191 | if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && | 190 | if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && |
192 | !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) | 191 | !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) |
193 | #if RSVP_DST_LEN == 4 | 192 | #if RSVP_DST_LEN == 4 |
194 | && | 193 | && |
195 | src[0] == f->src[0] && | 194 | src[0] == f->src[0] && |
@@ -205,7 +204,7 @@ matched: | |||
205 | return 0; | 204 | return 0; |
206 | 205 | ||
207 | tunnelid = f->res.classid; | 206 | tunnelid = f->res.classid; |
208 | nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); | 207 | nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); |
209 | goto restart; | 208 | goto restart; |
210 | } | 209 | } |
211 | } | 210 | } |
@@ -224,11 +223,11 @@ matched: | |||
224 | 223 | ||
225 | static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) | 224 | static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) |
226 | { | 225 | { |
227 | struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; | 226 | struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; |
228 | struct rsvp_session *s; | 227 | struct rsvp_session *s; |
229 | struct rsvp_filter *f; | 228 | struct rsvp_filter *f; |
230 | unsigned h1 = handle&0xFF; | 229 | unsigned int h1 = handle & 0xFF; |
231 | unsigned h2 = (handle>>8)&0xFF; | 230 | unsigned int h2 = (handle >> 8) & 0xFF; |
232 | 231 | ||
233 | if (h2 > 16) | 232 | if (h2 > 16) |
234 | return 0; | 233 | return 0; |
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp) | |||
258 | return -ENOBUFS; | 257 | return -ENOBUFS; |
259 | } | 258 | } |
260 | 259 | ||
261 | static inline void | 260 | static void |
262 | rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) | 261 | rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) |
263 | { | 262 | { |
264 | tcf_unbind_filter(tp, &f->res); | 263 | tcf_unbind_filter(tp, &f->res); |
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp) | |||
277 | 276 | ||
278 | sht = data->ht; | 277 | sht = data->ht; |
279 | 278 | ||
280 | for (h1=0; h1<256; h1++) { | 279 | for (h1 = 0; h1 < 256; h1++) { |
281 | struct rsvp_session *s; | 280 | struct rsvp_session *s; |
282 | 281 | ||
283 | while ((s = sht[h1]) != NULL) { | 282 | while ((s = sht[h1]) != NULL) { |
284 | sht[h1] = s->next; | 283 | sht[h1] = s->next; |
285 | 284 | ||
286 | for (h2=0; h2<=16; h2++) { | 285 | for (h2 = 0; h2 <= 16; h2++) { |
287 | struct rsvp_filter *f; | 286 | struct rsvp_filter *f; |
288 | 287 | ||
289 | while ((f = s->ht[h2]) != NULL) { | 288 | while ((f = s->ht[h2]) != NULL) { |
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp) | |||
299 | 298 | ||
300 | static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | 299 | static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) |
301 | { | 300 | { |
302 | struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; | 301 | struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg; |
303 | unsigned h = f->handle; | 302 | unsigned int h = f->handle; |
304 | struct rsvp_session **sp; | 303 | struct rsvp_session **sp; |
305 | struct rsvp_session *s = f->sess; | 304 | struct rsvp_session *s = f->sess; |
306 | int i; | 305 | int i; |
307 | 306 | ||
308 | for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { | 307 | for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) { |
309 | if (*fp == f) { | 308 | if (*fp == f) { |
310 | tcf_tree_lock(tp); | 309 | tcf_tree_lock(tp); |
311 | *fp = f->next; | 310 | *fp = f->next; |
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | |||
314 | 313 | ||
315 | /* Strip tree */ | 314 | /* Strip tree */ |
316 | 315 | ||
317 | for (i=0; i<=16; i++) | 316 | for (i = 0; i <= 16; i++) |
318 | if (s->ht[i]) | 317 | if (s->ht[i]) |
319 | return 0; | 318 | return 0; |
320 | 319 | ||
321 | /* OK, session has no flows */ | 320 | /* OK, session has no flows */ |
322 | for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; | 321 | for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF]; |
323 | *sp; sp = &(*sp)->next) { | 322 | *sp; sp = &(*sp)->next) { |
324 | if (*sp == s) { | 323 | if (*sp == s) { |
325 | tcf_tree_lock(tp); | 324 | tcf_tree_lock(tp); |
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | |||
337 | return 0; | 336 | return 0; |
338 | } | 337 | } |
339 | 338 | ||
340 | static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) | 339 | static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) |
341 | { | 340 | { |
342 | struct rsvp_head *data = tp->root; | 341 | struct rsvp_head *data = tp->root; |
343 | int i = 0xFFFF; | 342 | int i = 0xFFFF; |
344 | 343 | ||
345 | while (i-- > 0) { | 344 | while (i-- > 0) { |
346 | u32 h; | 345 | u32 h; |
346 | |||
347 | if ((data->hgenerator += 0x10000) == 0) | 347 | if ((data->hgenerator += 0x10000) == 0) |
348 | data->hgenerator = 0x10000; | 348 | data->hgenerator = 0x10000; |
349 | h = data->hgenerator|salt; | 349 | h = data->hgenerator|salt; |
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) | |||
355 | 355 | ||
356 | static int tunnel_bts(struct rsvp_head *data) | 356 | static int tunnel_bts(struct rsvp_head *data) |
357 | { | 357 | { |
358 | int n = data->tgenerator>>5; | 358 | int n = data->tgenerator >> 5; |
359 | u32 b = 1<<(data->tgenerator&0x1F); | 359 | u32 b = 1 << (data->tgenerator & 0x1F); |
360 | 360 | ||
361 | if (data->tmap[n]&b) | 361 | if (data->tmap[n] & b) |
362 | return 0; | 362 | return 0; |
363 | data->tmap[n] |= b; | 363 | data->tmap[n] |= b; |
364 | return 1; | 364 | return 1; |
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data) | |||
372 | 372 | ||
373 | memset(tmap, 0, sizeof(tmap)); | 373 | memset(tmap, 0, sizeof(tmap)); |
374 | 374 | ||
375 | for (h1=0; h1<256; h1++) { | 375 | for (h1 = 0; h1 < 256; h1++) { |
376 | struct rsvp_session *s; | 376 | struct rsvp_session *s; |
377 | for (s = sht[h1]; s; s = s->next) { | 377 | for (s = sht[h1]; s; s = s->next) { |
378 | for (h2=0; h2<=16; h2++) { | 378 | for (h2 = 0; h2 <= 16; h2++) { |
379 | struct rsvp_filter *f; | 379 | struct rsvp_filter *f; |
380 | 380 | ||
381 | for (f = s->ht[h2]; f; f = f->next) { | 381 | for (f = s->ht[h2]; f; f = f->next) { |
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data) | |||
395 | { | 395 | { |
396 | int i, k; | 396 | int i, k; |
397 | 397 | ||
398 | for (k=0; k<2; k++) { | 398 | for (k = 0; k < 2; k++) { |
399 | for (i=255; i>0; i--) { | 399 | for (i = 255; i > 0; i--) { |
400 | if (++data->tgenerator == 0) | 400 | if (++data->tgenerator == 0) |
401 | data->tgenerator = 1; | 401 | data->tgenerator = 1; |
402 | if (tunnel_bts(data)) | 402 | if (tunnel_bts(data)) |
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
428 | struct nlattr *opt = tca[TCA_OPTIONS-1]; | 428 | struct nlattr *opt = tca[TCA_OPTIONS-1]; |
429 | struct nlattr *tb[TCA_RSVP_MAX + 1]; | 429 | struct nlattr *tb[TCA_RSVP_MAX + 1]; |
430 | struct tcf_exts e; | 430 | struct tcf_exts e; |
431 | unsigned h1, h2; | 431 | unsigned int h1, h2; |
432 | __be32 *dst; | 432 | __be32 *dst; |
433 | int err; | 433 | int err; |
434 | 434 | ||
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
443 | if (err < 0) | 443 | if (err < 0) |
444 | return err; | 444 | return err; |
445 | 445 | ||
446 | if ((f = (struct rsvp_filter*)*arg) != NULL) { | 446 | f = (struct rsvp_filter *)*arg; |
447 | if (f) { | ||
447 | /* Node exists: adjust only classid */ | 448 | /* Node exists: adjust only classid */ |
448 | 449 | ||
449 | if (f->handle != handle && handle) | 450 | if (f->handle != handle && handle) |
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
500 | goto errout; | 501 | goto errout; |
501 | } | 502 | } |
502 | 503 | ||
503 | for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { | 504 | for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) { |
504 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && | 505 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && |
505 | pinfo && pinfo->protocol == s->protocol && | 506 | pinfo && pinfo->protocol == s->protocol && |
506 | memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && | 507 | memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && |
@@ -523,7 +524,7 @@ insert: | |||
523 | tcf_exts_change(tp, &f->exts, &e); | 524 | tcf_exts_change(tp, &f->exts, &e); |
524 | 525 | ||
525 | for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) | 526 | for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) |
526 | if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) | 527 | if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask) |
527 | break; | 528 | break; |
528 | f->next = *fp; | 529 | f->next = *fp; |
529 | wmb(); | 530 | wmb(); |
@@ -567,7 +568,7 @@ errout2: | |||
567 | static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 568 | static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
568 | { | 569 | { |
569 | struct rsvp_head *head = tp->root; | 570 | struct rsvp_head *head = tp->root; |
570 | unsigned h, h1; | 571 | unsigned int h, h1; |
571 | 572 | ||
572 | if (arg->stop) | 573 | if (arg->stop) |
573 | return; | 574 | return; |
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
598 | static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, | 599 | static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, |
599 | struct sk_buff *skb, struct tcmsg *t) | 600 | struct sk_buff *skb, struct tcmsg *t) |
600 | { | 601 | { |
601 | struct rsvp_filter *f = (struct rsvp_filter*)fh; | 602 | struct rsvp_filter *f = (struct rsvp_filter *)fh; |
602 | struct rsvp_session *s; | 603 | struct rsvp_session *s; |
603 | unsigned char *b = skb_tail_pointer(skb); | 604 | unsigned char *b = skb_tail_pointer(skb); |
604 | struct nlattr *nest; | 605 | struct nlattr *nest; |
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, | |||
624 | NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); | 625 | NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); |
625 | if (f->res.classid) | 626 | if (f->res.classid) |
626 | NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); | 627 | NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); |
627 | if (((f->handle>>8)&0xFF) != 16) | 628 | if (((f->handle >> 8) & 0xFF) != 16) |
628 | NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); | 629 | NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); |
629 | 630 | ||
630 | if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) | 631 | if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) |
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 20ef330bb918..36667fa64237 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c | |||
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
249 | * of the hashing index is below the threshold. | 249 | * of the hashing index is below the threshold. |
250 | */ | 250 | */ |
251 | if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) | 251 | if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) |
252 | cp.hash = (cp.mask >> cp.shift)+1; | 252 | cp.hash = (cp.mask >> cp.shift) + 1; |
253 | else | 253 | else |
254 | cp.hash = DEFAULT_HASH_SIZE; | 254 | cp.hash = DEFAULT_HASH_SIZE; |
255 | } | 255 | } |
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b0c2a82178af..966920c14e7a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c | |||
@@ -42,8 +42,7 @@ | |||
42 | #include <net/act_api.h> | 42 | #include <net/act_api.h> |
43 | #include <net/pkt_cls.h> | 43 | #include <net/pkt_cls.h> |
44 | 44 | ||
45 | struct tc_u_knode | 45 | struct tc_u_knode { |
46 | { | ||
47 | struct tc_u_knode *next; | 46 | struct tc_u_knode *next; |
48 | u32 handle; | 47 | u32 handle; |
49 | struct tc_u_hnode *ht_up; | 48 | struct tc_u_hnode *ht_up; |
@@ -63,19 +62,17 @@ struct tc_u_knode | |||
63 | struct tc_u32_sel sel; | 62 | struct tc_u32_sel sel; |
64 | }; | 63 | }; |
65 | 64 | ||
66 | struct tc_u_hnode | 65 | struct tc_u_hnode { |
67 | { | ||
68 | struct tc_u_hnode *next; | 66 | struct tc_u_hnode *next; |
69 | u32 handle; | 67 | u32 handle; |
70 | u32 prio; | 68 | u32 prio; |
71 | struct tc_u_common *tp_c; | 69 | struct tc_u_common *tp_c; |
72 | int refcnt; | 70 | int refcnt; |
73 | unsigned divisor; | 71 | unsigned int divisor; |
74 | struct tc_u_knode *ht[1]; | 72 | struct tc_u_knode *ht[1]; |
75 | }; | 73 | }; |
76 | 74 | ||
77 | struct tc_u_common | 75 | struct tc_u_common { |
78 | { | ||
79 | struct tc_u_hnode *hlist; | 76 | struct tc_u_hnode *hlist; |
80 | struct Qdisc *q; | 77 | struct Qdisc *q; |
81 | int refcnt; | 78 | int refcnt; |
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = { | |||
87 | .police = TCA_U32_POLICE | 84 | .police = TCA_U32_POLICE |
88 | }; | 85 | }; |
89 | 86 | ||
90 | static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) | 87 | static inline unsigned int u32_hash_fold(__be32 key, |
88 | const struct tc_u32_sel *sel, | ||
89 | u8 fshift) | ||
91 | { | 90 | { |
92 | unsigned h = ntohl(key & sel->hmask)>>fshift; | 91 | unsigned int h = ntohl(key & sel->hmask) >> fshift; |
93 | 92 | ||
94 | return h; | 93 | return h; |
95 | } | 94 | } |
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re | |||
101 | unsigned int off; | 100 | unsigned int off; |
102 | } stack[TC_U32_MAXDEPTH]; | 101 | } stack[TC_U32_MAXDEPTH]; |
103 | 102 | ||
104 | struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; | 103 | struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root; |
105 | unsigned int off = skb_network_offset(skb); | 104 | unsigned int off = skb_network_offset(skb); |
106 | struct tc_u_knode *n; | 105 | struct tc_u_knode *n; |
107 | int sdepth = 0; | 106 | int sdepth = 0; |
@@ -120,7 +119,7 @@ next_knode: | |||
120 | struct tc_u32_key *key = n->sel.keys; | 119 | struct tc_u32_key *key = n->sel.keys; |
121 | 120 | ||
122 | #ifdef CONFIG_CLS_U32_PERF | 121 | #ifdef CONFIG_CLS_U32_PERF |
123 | n->pf->rcnt +=1; | 122 | n->pf->rcnt += 1; |
124 | j = 0; | 123 | j = 0; |
125 | #endif | 124 | #endif |
126 | 125 | ||
@@ -133,7 +132,7 @@ next_knode: | |||
133 | } | 132 | } |
134 | #endif | 133 | #endif |
135 | 134 | ||
136 | for (i = n->sel.nkeys; i>0; i--, key++) { | 135 | for (i = n->sel.nkeys; i > 0; i--, key++) { |
137 | int toff = off + key->off + (off2 & key->offmask); | 136 | int toff = off + key->off + (off2 & key->offmask); |
138 | __be32 *data, _data; | 137 | __be32 *data, _data; |
139 | 138 | ||
@@ -148,13 +147,13 @@ next_knode: | |||
148 | goto next_knode; | 147 | goto next_knode; |
149 | } | 148 | } |
150 | #ifdef CONFIG_CLS_U32_PERF | 149 | #ifdef CONFIG_CLS_U32_PERF |
151 | n->pf->kcnts[j] +=1; | 150 | n->pf->kcnts[j] += 1; |
152 | j++; | 151 | j++; |
153 | #endif | 152 | #endif |
154 | } | 153 | } |
155 | if (n->ht_down == NULL) { | 154 | if (n->ht_down == NULL) { |
156 | check_terminal: | 155 | check_terminal: |
157 | if (n->sel.flags&TC_U32_TERMINAL) { | 156 | if (n->sel.flags & TC_U32_TERMINAL) { |
158 | 157 | ||
159 | *res = n->res; | 158 | *res = n->res; |
160 | #ifdef CONFIG_NET_CLS_IND | 159 | #ifdef CONFIG_NET_CLS_IND |
@@ -164,7 +163,7 @@ check_terminal: | |||
164 | } | 163 | } |
165 | #endif | 164 | #endif |
166 | #ifdef CONFIG_CLS_U32_PERF | 165 | #ifdef CONFIG_CLS_U32_PERF |
167 | n->pf->rhit +=1; | 166 | n->pf->rhit += 1; |
168 | #endif | 167 | #endif |
169 | r = tcf_exts_exec(skb, &n->exts, res); | 168 | r = tcf_exts_exec(skb, &n->exts, res); |
170 | if (r < 0) { | 169 | if (r < 0) { |
@@ -197,10 +196,10 @@ check_terminal: | |||
197 | sel = ht->divisor & u32_hash_fold(*data, &n->sel, | 196 | sel = ht->divisor & u32_hash_fold(*data, &n->sel, |
198 | n->fshift); | 197 | n->fshift); |
199 | } | 198 | } |
200 | if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) | 199 | if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) |
201 | goto next_ht; | 200 | goto next_ht; |
202 | 201 | ||
203 | if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { | 202 | if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { |
204 | off2 = n->sel.off + 3; | 203 | off2 = n->sel.off + 3; |
205 | if (n->sel.flags & TC_U32_VAROFFSET) { | 204 | if (n->sel.flags & TC_U32_VAROFFSET) { |
206 | __be16 *data, _data; | 205 | __be16 *data, _data; |
@@ -215,7 +214,7 @@ check_terminal: | |||
215 | } | 214 | } |
216 | off2 &= ~3; | 215 | off2 &= ~3; |
217 | } | 216 | } |
218 | if (n->sel.flags&TC_U32_EAT) { | 217 | if (n->sel.flags & TC_U32_EAT) { |
219 | off += off2; | 218 | off += off2; |
220 | off2 = 0; | 219 | off2 = 0; |
221 | } | 220 | } |
@@ -236,11 +235,11 @@ out: | |||
236 | 235 | ||
237 | deadloop: | 236 | deadloop: |
238 | if (net_ratelimit()) | 237 | if (net_ratelimit()) |
239 | printk(KERN_WARNING "cls_u32: dead loop\n"); | 238 | pr_warning("cls_u32: dead loop\n"); |
240 | return -1; | 239 | return -1; |
241 | } | 240 | } |
242 | 241 | ||
243 | static __inline__ struct tc_u_hnode * | 242 | static struct tc_u_hnode * |
244 | u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) | 243 | u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) |
245 | { | 244 | { |
246 | struct tc_u_hnode *ht; | 245 | struct tc_u_hnode *ht; |
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) | |||
252 | return ht; | 251 | return ht; |
253 | } | 252 | } |
254 | 253 | ||
255 | static __inline__ struct tc_u_knode * | 254 | static struct tc_u_knode * |
256 | u32_lookup_key(struct tc_u_hnode *ht, u32 handle) | 255 | u32_lookup_key(struct tc_u_hnode *ht, u32 handle) |
257 | { | 256 | { |
258 | unsigned sel; | 257 | unsigned int sel; |
259 | struct tc_u_knode *n = NULL; | 258 | struct tc_u_knode *n = NULL; |
260 | 259 | ||
261 | sel = TC_U32_HASH(handle); | 260 | sel = TC_U32_HASH(handle); |
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c) | |||
300 | do { | 299 | do { |
301 | if (++tp_c->hgenerator == 0x7FF) | 300 | if (++tp_c->hgenerator == 0x7FF) |
302 | tp_c->hgenerator = 1; | 301 | tp_c->hgenerator = 1; |
303 | } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); | 302 | } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); |
304 | 303 | ||
305 | return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; | 304 | return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; |
306 | } | 305 | } |
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) | |||
378 | static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) | 377 | static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) |
379 | { | 378 | { |
380 | struct tc_u_knode *n; | 379 | struct tc_u_knode *n; |
381 | unsigned h; | 380 | unsigned int h; |
382 | 381 | ||
383 | for (h=0; h<=ht->divisor; h++) { | 382 | for (h = 0; h <= ht->divisor; h++) { |
384 | while ((n = ht->ht[h]) != NULL) { | 383 | while ((n = ht->ht[h]) != NULL) { |
385 | ht->ht[h] = n->next; | 384 | ht->ht[h] = n->next; |
386 | 385 | ||
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp) | |||
446 | 445 | ||
447 | static int u32_delete(struct tcf_proto *tp, unsigned long arg) | 446 | static int u32_delete(struct tcf_proto *tp, unsigned long arg) |
448 | { | 447 | { |
449 | struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; | 448 | struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; |
450 | 449 | ||
451 | if (ht == NULL) | 450 | if (ht == NULL) |
452 | return 0; | 451 | return 0; |
453 | 452 | ||
454 | if (TC_U32_KEY(ht->handle)) | 453 | if (TC_U32_KEY(ht->handle)) |
455 | return u32_delete_key(tp, (struct tc_u_knode*)ht); | 454 | return u32_delete_key(tp, (struct tc_u_knode *)ht); |
456 | 455 | ||
457 | if (tp->root == ht) | 456 | if (tp->root == ht) |
458 | return -EINVAL; | 457 | return -EINVAL; |
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) | |||
470 | static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) | 469 | static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) |
471 | { | 470 | { |
472 | struct tc_u_knode *n; | 471 | struct tc_u_knode *n; |
473 | unsigned i = 0x7FF; | 472 | unsigned int i = 0x7FF; |
474 | 473 | ||
475 | for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) | 474 | for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) |
476 | if (i < TC_U32_NODE(n->handle)) | 475 | if (i < TC_U32_NODE(n->handle)) |
477 | i = TC_U32_NODE(n->handle); | 476 | i = TC_U32_NODE(n->handle); |
478 | i++; | 477 | i++; |
479 | 478 | ||
480 | return handle|(i>0xFFF ? 0xFFF : i); | 479 | return handle | (i > 0xFFF ? 0xFFF : i); |
481 | } | 480 | } |
482 | 481 | ||
483 | static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { | 482 | static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { |
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
566 | if (err < 0) | 565 | if (err < 0) |
567 | return err; | 566 | return err; |
568 | 567 | ||
569 | if ((n = (struct tc_u_knode*)*arg) != NULL) { | 568 | n = (struct tc_u_knode *)*arg; |
569 | if (n) { | ||
570 | if (TC_U32_KEY(n->handle) == 0) | 570 | if (TC_U32_KEY(n->handle) == 0) |
571 | return -EINVAL; | 571 | return -EINVAL; |
572 | 572 | ||
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
574 | } | 574 | } |
575 | 575 | ||
576 | if (tb[TCA_U32_DIVISOR]) { | 576 | if (tb[TCA_U32_DIVISOR]) { |
577 | unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); | 577 | unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); |
578 | 578 | ||
579 | if (--divisor > 0x100) | 579 | if (--divisor > 0x100) |
580 | return -EINVAL; | 580 | return -EINVAL; |
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
585 | if (handle == 0) | 585 | if (handle == 0) |
586 | return -ENOMEM; | 586 | return -ENOMEM; |
587 | } | 587 | } |
588 | ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); | 588 | ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); |
589 | if (ht == NULL) | 589 | if (ht == NULL) |
590 | return -ENOBUFS; | 590 | return -ENOBUFS; |
591 | ht->tp_c = tp_c; | 591 | ht->tp_c = tp_c; |
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
683 | struct tc_u_common *tp_c = tp->data; | 683 | struct tc_u_common *tp_c = tp->data; |
684 | struct tc_u_hnode *ht; | 684 | struct tc_u_hnode *ht; |
685 | struct tc_u_knode *n; | 685 | struct tc_u_knode *n; |
686 | unsigned h; | 686 | unsigned int h; |
687 | 687 | ||
688 | if (arg->stop) | 688 | if (arg->stop) |
689 | return; | 689 | return; |
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
717 | static int u32_dump(struct tcf_proto *tp, unsigned long fh, | 717 | static int u32_dump(struct tcf_proto *tp, unsigned long fh, |
718 | struct sk_buff *skb, struct tcmsg *t) | 718 | struct sk_buff *skb, struct tcmsg *t) |
719 | { | 719 | { |
720 | struct tc_u_knode *n = (struct tc_u_knode*)fh; | 720 | struct tc_u_knode *n = (struct tc_u_knode *)fh; |
721 | struct nlattr *nest; | 721 | struct nlattr *nest; |
722 | 722 | ||
723 | if (n == NULL) | 723 | if (n == NULL) |
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, | |||
730 | goto nla_put_failure; | 730 | goto nla_put_failure; |
731 | 731 | ||
732 | if (TC_U32_KEY(n->handle) == 0) { | 732 | if (TC_U32_KEY(n->handle) == 0) { |
733 | struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; | 733 | struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; |
734 | u32 divisor = ht->divisor+1; | 734 | u32 divisor = ht->divisor + 1; |
735 | |||
735 | NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); | 736 | NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); |
736 | } else { | 737 | } else { |
737 | NLA_PUT(skb, TCA_U32_SEL, | 738 | NLA_PUT(skb, TCA_U32_SEL, |
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, | |||
755 | goto nla_put_failure; | 756 | goto nla_put_failure; |
756 | 757 | ||
757 | #ifdef CONFIG_NET_CLS_IND | 758 | #ifdef CONFIG_NET_CLS_IND |
758 | if(strlen(n->indev)) | 759 | if (strlen(n->indev)) |
759 | NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); | 760 | NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); |
760 | #endif | 761 | #endif |
761 | #ifdef CONFIG_CLS_U32_PERF | 762 | #ifdef CONFIG_CLS_U32_PERF |
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index bc450397487a..1c8360a2752a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c | |||
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
33 | return 0; | 33 | return 0; |
34 | 34 | ||
35 | switch (cmp->align) { | 35 | switch (cmp->align) { |
36 | case TCF_EM_ALIGN_U8: | 36 | case TCF_EM_ALIGN_U8: |
37 | val = *ptr; | 37 | val = *ptr; |
38 | break; | 38 | break; |
39 | 39 | ||
40 | case TCF_EM_ALIGN_U16: | 40 | case TCF_EM_ALIGN_U16: |
41 | val = get_unaligned_be16(ptr); | 41 | val = get_unaligned_be16(ptr); |
42 | 42 | ||
43 | if (cmp_needs_transformation(cmp)) | 43 | if (cmp_needs_transformation(cmp)) |
44 | val = be16_to_cpu(val); | 44 | val = be16_to_cpu(val); |
45 | break; | 45 | break; |
46 | 46 | ||
47 | case TCF_EM_ALIGN_U32: | 47 | case TCF_EM_ALIGN_U32: |
48 | /* Worth checking boundries? The branching seems | 48 | /* Worth checking boundries? The branching seems |
49 | * to get worse. Visit again. */ | 49 | * to get worse. Visit again. |
50 | val = get_unaligned_be32(ptr); | 50 | */ |
51 | val = get_unaligned_be32(ptr); | ||
51 | 52 | ||
52 | if (cmp_needs_transformation(cmp)) | 53 | if (cmp_needs_transformation(cmp)) |
53 | val = be32_to_cpu(val); | 54 | val = be32_to_cpu(val); |
54 | break; | 55 | break; |
55 | 56 | ||
56 | default: | 57 | default: |
57 | return 0; | 58 | return 0; |
58 | } | 59 | } |
59 | 60 | ||
60 | if (cmp->mask) | 61 | if (cmp->mask) |
61 | val &= cmp->mask; | 62 | val &= cmp->mask; |
62 | 63 | ||
63 | switch (cmp->opnd) { | 64 | switch (cmp->opnd) { |
64 | case TCF_EM_OPND_EQ: | 65 | case TCF_EM_OPND_EQ: |
65 | return val == cmp->val; | 66 | return val == cmp->val; |
66 | case TCF_EM_OPND_LT: | 67 | case TCF_EM_OPND_LT: |
67 | return val < cmp->val; | 68 | return val < cmp->val; |
68 | case TCF_EM_OPND_GT: | 69 | case TCF_EM_OPND_GT: |
69 | return val > cmp->val; | 70 | return val > cmp->val; |
70 | } | 71 | } |
71 | 72 | ||
72 | return 0; | 73 | return 0; |
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 34da5e29ea1a..a889d099320f 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c | |||
@@ -73,21 +73,18 @@ | |||
73 | #include <net/pkt_cls.h> | 73 | #include <net/pkt_cls.h> |
74 | #include <net/sock.h> | 74 | #include <net/sock.h> |
75 | 75 | ||
76 | struct meta_obj | 76 | struct meta_obj { |
77 | { | ||
78 | unsigned long value; | 77 | unsigned long value; |
79 | unsigned int len; | 78 | unsigned int len; |
80 | }; | 79 | }; |
81 | 80 | ||
82 | struct meta_value | 81 | struct meta_value { |
83 | { | ||
84 | struct tcf_meta_val hdr; | 82 | struct tcf_meta_val hdr; |
85 | unsigned long val; | 83 | unsigned long val; |
86 | unsigned int len; | 84 | unsigned int len; |
87 | }; | 85 | }; |
88 | 86 | ||
89 | struct meta_match | 87 | struct meta_match { |
90 | { | ||
91 | struct meta_value lvalue; | 88 | struct meta_value lvalue; |
92 | struct meta_value rvalue; | 89 | struct meta_value rvalue; |
93 | }; | 90 | }; |
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid) | |||
255 | if (unlikely(skb_dst(skb) == NULL)) | 252 | if (unlikely(skb_dst(skb) == NULL)) |
256 | *err = -1; | 253 | *err = -1; |
257 | else | 254 | else |
258 | #ifdef CONFIG_NET_CLS_ROUTE | 255 | #ifdef CONFIG_IP_ROUTE_CLASSID |
259 | dst->value = skb_dst(skb)->tclassid; | 256 | dst->value = skb_dst(skb)->tclassid; |
260 | #else | 257 | #else |
261 | dst->value = 0; | 258 | dst->value = 0; |
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend) | |||
483 | * Meta value collectors assignment table | 480 | * Meta value collectors assignment table |
484 | **************************************************************************/ | 481 | **************************************************************************/ |
485 | 482 | ||
486 | struct meta_ops | 483 | struct meta_ops { |
487 | { | ||
488 | void (*get)(struct sk_buff *, struct tcf_pkt_info *, | 484 | void (*get)(struct sk_buff *, struct tcf_pkt_info *, |
489 | struct meta_value *, struct meta_obj *, int *); | 485 | struct meta_value *, struct meta_obj *, int *); |
490 | }; | 486 | }; |
@@ -494,7 +490,7 @@ struct meta_ops | |||
494 | 490 | ||
495 | /* Meta value operations table listing all meta value collectors and | 491 | /* Meta value operations table listing all meta value collectors and |
496 | * assigns them to a type and meta id. */ | 492 | * assigns them to a type and meta id. */ |
497 | static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { | 493 | static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = { |
498 | [TCF_META_TYPE_VAR] = { | 494 | [TCF_META_TYPE_VAR] = { |
499 | [META_ID(DEV)] = META_FUNC(var_dev), | 495 | [META_ID(DEV)] = META_FUNC(var_dev), |
500 | [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), | 496 | [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), |
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { | |||
550 | } | 546 | } |
551 | }; | 547 | }; |
552 | 548 | ||
553 | static inline struct meta_ops * meta_ops(struct meta_value *val) | 549 | static inline struct meta_ops *meta_ops(struct meta_value *val) |
554 | { | 550 | { |
555 | return &__meta_ops[meta_type(val)][meta_id(val)]; | 551 | return &__meta_ops[meta_type(val)][meta_id(val)]; |
556 | } | 552 | } |
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) | |||
649 | { | 645 | { |
650 | if (v->len == sizeof(unsigned long)) | 646 | if (v->len == sizeof(unsigned long)) |
651 | NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); | 647 | NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); |
652 | else if (v->len == sizeof(u32)) { | 648 | else if (v->len == sizeof(u32)) |
653 | NLA_PUT_U32(skb, tlv, v->val); | 649 | NLA_PUT_U32(skb, tlv, v->val); |
654 | } | ||
655 | 650 | ||
656 | return 0; | 651 | return 0; |
657 | 652 | ||
@@ -663,8 +658,7 @@ nla_put_failure: | |||
663 | * Type specific operations table | 658 | * Type specific operations table |
664 | **************************************************************************/ | 659 | **************************************************************************/ |
665 | 660 | ||
666 | struct meta_type_ops | 661 | struct meta_type_ops { |
667 | { | ||
668 | void (*destroy)(struct meta_value *); | 662 | void (*destroy)(struct meta_value *); |
669 | int (*compare)(struct meta_obj *, struct meta_obj *); | 663 | int (*compare)(struct meta_obj *, struct meta_obj *); |
670 | int (*change)(struct meta_value *, struct nlattr *); | 664 | int (*change)(struct meta_value *, struct nlattr *); |
@@ -672,7 +666,7 @@ struct meta_type_ops | |||
672 | int (*dump)(struct sk_buff *, struct meta_value *, int); | 666 | int (*dump)(struct sk_buff *, struct meta_value *, int); |
673 | }; | 667 | }; |
674 | 668 | ||
675 | static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { | 669 | static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { |
676 | [TCF_META_TYPE_VAR] = { | 670 | [TCF_META_TYPE_VAR] = { |
677 | .destroy = meta_var_destroy, | 671 | .destroy = meta_var_destroy, |
678 | .compare = meta_var_compare, | 672 | .compare = meta_var_compare, |
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { | |||
688 | } | 682 | } |
689 | }; | 683 | }; |
690 | 684 | ||
691 | static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) | 685 | static inline struct meta_type_ops *meta_type_ops(struct meta_value *v) |
692 | { | 686 | { |
693 | return &__meta_type_ops[meta_type(v)]; | 687 | return &__meta_type_ops[meta_type(v)]; |
694 | } | 688 | } |
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, | |||
713 | return err; | 707 | return err; |
714 | 708 | ||
715 | if (meta_type_ops(v)->apply_extras) | 709 | if (meta_type_ops(v)->apply_extras) |
716 | meta_type_ops(v)->apply_extras(v, dst); | 710 | meta_type_ops(v)->apply_extras(v, dst); |
717 | 711 | ||
718 | return 0; | 712 | return 0; |
719 | } | 713 | } |
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, | |||
732 | r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); | 726 | r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); |
733 | 727 | ||
734 | switch (meta->lvalue.hdr.op) { | 728 | switch (meta->lvalue.hdr.op) { |
735 | case TCF_EM_OPND_EQ: | 729 | case TCF_EM_OPND_EQ: |
736 | return !r; | 730 | return !r; |
737 | case TCF_EM_OPND_LT: | 731 | case TCF_EM_OPND_LT: |
738 | return r < 0; | 732 | return r < 0; |
739 | case TCF_EM_OPND_GT: | 733 | case TCF_EM_OPND_GT: |
740 | return r > 0; | 734 | return r > 0; |
741 | } | 735 | } |
742 | 736 | ||
743 | return 0; | 737 | return 0; |
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla) | |||
771 | 765 | ||
772 | static inline int meta_is_supported(struct meta_value *val) | 766 | static inline int meta_is_supported(struct meta_value *val) |
773 | { | 767 | { |
774 | return (!meta_id(val) || meta_ops(val)->get); | 768 | return !meta_id(val) || meta_ops(val)->get; |
775 | } | 769 | } |
776 | 770 | ||
777 | static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { | 771 | static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { |
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 1a4176aee6e5..a3bed07a008b 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c | |||
@@ -18,8 +18,7 @@ | |||
18 | #include <linux/tc_ematch/tc_em_nbyte.h> | 18 | #include <linux/tc_ematch/tc_em_nbyte.h> |
19 | #include <net/pkt_cls.h> | 19 | #include <net/pkt_cls.h> |
20 | 20 | ||
21 | struct nbyte_data | 21 | struct nbyte_data { |
22 | { | ||
23 | struct tcf_em_nbyte hdr; | 22 | struct tcf_em_nbyte hdr; |
24 | char pattern[0]; | 23 | char pattern[0]; |
25 | }; | 24 | }; |
diff --git a/net/sched/em_text.c b/net/sched/em_text.c index ea8f566e720c..15d353d2e4be 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c | |||
@@ -19,8 +19,7 @@ | |||
19 | #include <linux/tc_ematch/tc_em_text.h> | 19 | #include <linux/tc_ematch/tc_em_text.h> |
20 | #include <net/pkt_cls.h> | 20 | #include <net/pkt_cls.h> |
21 | 21 | ||
22 | struct text_match | 22 | struct text_match { |
23 | { | ||
24 | u16 from_offset; | 23 | u16 from_offset; |
25 | u16 to_offset; | 24 | u16 to_offset; |
26 | u8 from_layer; | 25 | u8 from_layer; |
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 953f1479f7da..797bdb88c010 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c | |||
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
35 | if (!tcf_valid_offset(skb, ptr, sizeof(u32))) | 35 | if (!tcf_valid_offset(skb, ptr, sizeof(u32))) |
36 | return 0; | 36 | return 0; |
37 | 37 | ||
38 | return !(((*(__be32*) ptr) ^ key->val) & key->mask); | 38 | return !(((*(__be32 *) ptr) ^ key->val) & key->mask); |
39 | } | 39 | } |
40 | 40 | ||
41 | static struct tcf_ematch_ops em_u32_ops = { | 41 | static struct tcf_ematch_ops em_u32_ops = { |
diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e37da961f80..88d93eb92507 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c | |||
@@ -93,7 +93,7 @@ | |||
93 | static LIST_HEAD(ematch_ops); | 93 | static LIST_HEAD(ematch_ops); |
94 | static DEFINE_RWLOCK(ematch_mod_lock); | 94 | static DEFINE_RWLOCK(ematch_mod_lock); |
95 | 95 | ||
96 | static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) | 96 | static struct tcf_ematch_ops *tcf_em_lookup(u16 kind) |
97 | { | 97 | { |
98 | struct tcf_ematch_ops *e = NULL; | 98 | struct tcf_ematch_ops *e = NULL; |
99 | 99 | ||
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops) | |||
163 | } | 163 | } |
164 | EXPORT_SYMBOL(tcf_em_unregister); | 164 | EXPORT_SYMBOL(tcf_em_unregister); |
165 | 165 | ||
166 | static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, | 166 | static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree, |
167 | int index) | 167 | int index) |
168 | { | 168 | { |
169 | return &tree->matches[index]; | 169 | return &tree->matches[index]; |
170 | } | 170 | } |
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
184 | 184 | ||
185 | if (em_hdr->kind == TCF_EM_CONTAINER) { | 185 | if (em_hdr->kind == TCF_EM_CONTAINER) { |
186 | /* Special ematch called "container", carries an index | 186 | /* Special ematch called "container", carries an index |
187 | * referencing an external ematch sequence. */ | 187 | * referencing an external ematch sequence. |
188 | */ | ||
188 | u32 ref; | 189 | u32 ref; |
189 | 190 | ||
190 | if (data_len < sizeof(ref)) | 191 | if (data_len < sizeof(ref)) |
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
195 | goto errout; | 196 | goto errout; |
196 | 197 | ||
197 | /* We do not allow backward jumps to avoid loops and jumps | 198 | /* We do not allow backward jumps to avoid loops and jumps |
198 | * to our own position are of course illegal. */ | 199 | * to our own position are of course illegal. |
200 | */ | ||
199 | if (ref <= idx) | 201 | if (ref <= idx) |
200 | goto errout; | 202 | goto errout; |
201 | 203 | ||
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
208 | * which automatically releases the reference again, therefore | 210 | * which automatically releases the reference again, therefore |
209 | * the module MUST not be given back under any circumstances | 211 | * the module MUST not be given back under any circumstances |
210 | * here. Be aware, the destroy function assumes that the | 212 | * here. Be aware, the destroy function assumes that the |
211 | * module is held if the ops field is non zero. */ | 213 | * module is held if the ops field is non zero. |
214 | */ | ||
212 | em->ops = tcf_em_lookup(em_hdr->kind); | 215 | em->ops = tcf_em_lookup(em_hdr->kind); |
213 | 216 | ||
214 | if (em->ops == NULL) { | 217 | if (em->ops == NULL) { |
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
221 | if (em->ops) { | 224 | if (em->ops) { |
222 | /* We dropped the RTNL mutex in order to | 225 | /* We dropped the RTNL mutex in order to |
223 | * perform the module load. Tell the caller | 226 | * perform the module load. Tell the caller |
224 | * to replay the request. */ | 227 | * to replay the request. |
228 | */ | ||
225 | module_put(em->ops->owner); | 229 | module_put(em->ops->owner); |
226 | err = -EAGAIN; | 230 | err = -EAGAIN; |
227 | } | 231 | } |
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
230 | } | 234 | } |
231 | 235 | ||
232 | /* ematch module provides expected length of data, so we | 236 | /* ematch module provides expected length of data, so we |
233 | * can do a basic sanity check. */ | 237 | * can do a basic sanity check. |
238 | */ | ||
234 | if (em->ops->datalen && data_len < em->ops->datalen) | 239 | if (em->ops->datalen && data_len < em->ops->datalen) |
235 | goto errout; | 240 | goto errout; |
236 | 241 | ||
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
246 | * TCF_EM_SIMPLE may be specified stating that the | 251 | * TCF_EM_SIMPLE may be specified stating that the |
247 | * data only consists of a u32 integer and the module | 252 | * data only consists of a u32 integer and the module |
248 | * does not expected a memory reference but rather | 253 | * does not expected a memory reference but rather |
249 | * the value carried. */ | 254 | * the value carried. |
255 | */ | ||
250 | if (em_hdr->flags & TCF_EM_SIMPLE) { | 256 | if (em_hdr->flags & TCF_EM_SIMPLE) { |
251 | if (data_len < sizeof(u32)) | 257 | if (data_len < sizeof(u32)) |
252 | goto errout; | 258 | goto errout; |
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, | |||
334 | * The array of rt attributes is parsed in the order as they are | 340 | * The array of rt attributes is parsed in the order as they are |
335 | * provided, their type must be incremental from 1 to n. Even | 341 | * provided, their type must be incremental from 1 to n. Even |
336 | * if it does not serve any real purpose, a failure of sticking | 342 | * if it does not serve any real purpose, a failure of sticking |
337 | * to this policy will result in parsing failure. */ | 343 | * to this policy will result in parsing failure. |
344 | */ | ||
338 | for (idx = 0; nla_ok(rt_match, list_len); idx++) { | 345 | for (idx = 0; nla_ok(rt_match, list_len); idx++) { |
339 | err = -EINVAL; | 346 | err = -EINVAL; |
340 | 347 | ||
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, | |||
359 | /* Check if the number of matches provided by userspace actually | 366 | /* Check if the number of matches provided by userspace actually |
360 | * complies with the array of matches. The number was used for | 367 | * complies with the array of matches. The number was used for |
361 | * the validation of references and a mismatch could lead to | 368 | * the validation of references and a mismatch could lead to |
362 | * undefined references during the matching process. */ | 369 | * undefined references during the matching process. |
370 | */ | ||
363 | if (idx != tree_hdr->nmatches) { | 371 | if (idx != tree_hdr->nmatches) { |
364 | err = -EINVAL; | 372 | err = -EINVAL; |
365 | goto errout_abort; | 373 | goto errout_abort; |
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) | |||
449 | .flags = em->flags | 457 | .flags = em->flags |
450 | }; | 458 | }; |
451 | 459 | ||
452 | NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); | 460 | NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr); |
453 | 461 | ||
454 | if (em->ops && em->ops->dump) { | 462 | if (em->ops && em->ops->dump) { |
455 | if (em->ops->dump(skb, em) < 0) | 463 | if (em->ops->dump(skb, em) < 0) |
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
478 | struct tcf_pkt_info *info) | 486 | struct tcf_pkt_info *info) |
479 | { | 487 | { |
480 | int r = em->ops->match(skb, em, info); | 488 | int r = em->ops->match(skb, em, info); |
489 | |||
481 | return tcf_em_is_inverted(em) ? !r : r; | 490 | return tcf_em_is_inverted(em) ? !r : r; |
482 | } | 491 | } |
483 | 492 | ||
@@ -527,8 +536,8 @@ pop_stack: | |||
527 | 536 | ||
528 | stack_overflow: | 537 | stack_overflow: |
529 | if (net_ratelimit()) | 538 | if (net_ratelimit()) |
530 | printk(KERN_WARNING "tc ematch: local stack overflow," | 539 | pr_warning("tc ematch: local stack overflow," |
531 | " increase NET_EMATCH_STACK\n"); | 540 | " increase NET_EMATCH_STACK\n"); |
532 | return -1; | 541 | return -1; |
533 | } | 542 | } |
534 | EXPORT_SYMBOL(__tcf_em_tree_match); | 543 | EXPORT_SYMBOL(__tcf_em_tree_match); |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b22ca2d1cebc..150741579408 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops) | |||
187 | int err = -ENOENT; | 187 | int err = -ENOENT; |
188 | 188 | ||
189 | write_lock(&qdisc_mod_lock); | 189 | write_lock(&qdisc_mod_lock); |
190 | for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) | 190 | for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) |
191 | if (q == qops) | 191 | if (q == qops) |
192 | break; | 192 | break; |
193 | if (q) { | 193 | if (q) { |
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) | |||
321 | if (!tab || --tab->refcnt) | 321 | if (!tab || --tab->refcnt) |
322 | return; | 322 | return; |
323 | 323 | ||
324 | for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { | 324 | for (rtabp = &qdisc_rtab_list; |
325 | (rtab = *rtabp) != NULL; | ||
326 | rtabp = &rtab->next) { | ||
325 | if (rtab == tab) { | 327 | if (rtab == tab) { |
326 | *rtabp = rtab->next; | 328 | *rtabp = rtab->next; |
327 | kfree(rtab); | 329 | kfree(rtab); |
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) | |||
396 | return stab; | 398 | return stab; |
397 | } | 399 | } |
398 | 400 | ||
401 | static void stab_kfree_rcu(struct rcu_head *head) | ||
402 | { | ||
403 | kfree(container_of(head, struct qdisc_size_table, rcu)); | ||
404 | } | ||
405 | |||
399 | void qdisc_put_stab(struct qdisc_size_table *tab) | 406 | void qdisc_put_stab(struct qdisc_size_table *tab) |
400 | { | 407 | { |
401 | if (!tab) | 408 | if (!tab) |
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) | |||
405 | 412 | ||
406 | if (--tab->refcnt == 0) { | 413 | if (--tab->refcnt == 0) { |
407 | list_del(&tab->list); | 414 | list_del(&tab->list); |
408 | kfree(tab); | 415 | call_rcu_bh(&tab->rcu, stab_kfree_rcu); |
409 | } | 416 | } |
410 | 417 | ||
411 | spin_unlock(&qdisc_stab_lock); | 418 | spin_unlock(&qdisc_stab_lock); |
@@ -428,7 +435,7 @@ nla_put_failure: | |||
428 | return -1; | 435 | return -1; |
429 | } | 436 | } |
430 | 437 | ||
431 | void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) | 438 | void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) |
432 | { | 439 | { |
433 | int pkt_len, slot; | 440 | int pkt_len, slot; |
434 | 441 | ||
@@ -454,14 +461,13 @@ out: | |||
454 | pkt_len = 1; | 461 | pkt_len = 1; |
455 | qdisc_skb_cb(skb)->pkt_len = pkt_len; | 462 | qdisc_skb_cb(skb)->pkt_len = pkt_len; |
456 | } | 463 | } |
457 | EXPORT_SYMBOL(qdisc_calculate_pkt_len); | 464 | EXPORT_SYMBOL(__qdisc_calculate_pkt_len); |
458 | 465 | ||
459 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) | 466 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) |
460 | { | 467 | { |
461 | if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { | 468 | if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { |
462 | printk(KERN_WARNING | 469 | pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", |
463 | "%s: %s qdisc %X: is non-work-conserving?\n", | 470 | txt, qdisc->ops->id, qdisc->handle >> 16); |
464 | txt, qdisc->ops->id, qdisc->handle >> 16); | ||
465 | qdisc->flags |= TCQ_F_WARN_NONWC; | 471 | qdisc->flags |= TCQ_F_WARN_NONWC; |
466 | } | 472 | } |
467 | } | 473 | } |
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) | |||
472 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, | 478 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, |
473 | timer); | 479 | timer); |
474 | 480 | ||
475 | wd->qdisc->flags &= ~TCQ_F_THROTTLED; | 481 | qdisc_unthrottled(wd->qdisc); |
476 | __netif_schedule(qdisc_root(wd->qdisc)); | 482 | __netif_schedule(qdisc_root(wd->qdisc)); |
477 | 483 | ||
478 | return HRTIMER_NORESTART; | 484 | return HRTIMER_NORESTART; |
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) | |||
494 | &qdisc_root_sleeping(wd->qdisc)->state)) | 500 | &qdisc_root_sleeping(wd->qdisc)->state)) |
495 | return; | 501 | return; |
496 | 502 | ||
497 | wd->qdisc->flags |= TCQ_F_THROTTLED; | 503 | qdisc_throttled(wd->qdisc); |
498 | time = ktime_set(0, 0); | 504 | time = ktime_set(0, 0); |
499 | time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); | 505 | time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); |
500 | hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); | 506 | hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); |
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule); | |||
504 | void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) | 510 | void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) |
505 | { | 511 | { |
506 | hrtimer_cancel(&wd->timer); | 512 | hrtimer_cancel(&wd->timer); |
507 | wd->qdisc->flags &= ~TCQ_F_THROTTLED; | 513 | qdisc_unthrottled(wd->qdisc); |
508 | } | 514 | } |
509 | EXPORT_SYMBOL(qdisc_watchdog_cancel); | 515 | EXPORT_SYMBOL(qdisc_watchdog_cancel); |
510 | 516 | ||
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) | |||
625 | autohandle = TC_H_MAKE(0x80000000U, 0); | 631 | autohandle = TC_H_MAKE(0x80000000U, 0); |
626 | } while (qdisc_lookup(dev, autohandle) && --i > 0); | 632 | } while (qdisc_lookup(dev, autohandle) && --i > 0); |
627 | 633 | ||
628 | return i>0 ? autohandle : 0; | 634 | return i > 0 ? autohandle : 0; |
629 | } | 635 | } |
630 | 636 | ||
631 | void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) | 637 | void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) |
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
834 | err = PTR_ERR(stab); | 840 | err = PTR_ERR(stab); |
835 | goto err_out4; | 841 | goto err_out4; |
836 | } | 842 | } |
837 | sch->stab = stab; | 843 | rcu_assign_pointer(sch->stab, stab); |
838 | } | 844 | } |
839 | if (tca[TCA_RATE]) { | 845 | if (tca[TCA_RATE]) { |
840 | spinlock_t *root_lock; | 846 | spinlock_t *root_lock; |
@@ -874,7 +880,7 @@ err_out4: | |||
874 | * Any broken qdiscs that would require a ops->reset() here? | 880 | * Any broken qdiscs that would require a ops->reset() here? |
875 | * The qdisc was never in action so it shouldn't be necessary. | 881 | * The qdisc was never in action so it shouldn't be necessary. |
876 | */ | 882 | */ |
877 | qdisc_put_stab(sch->stab); | 883 | qdisc_put_stab(rtnl_dereference(sch->stab)); |
878 | if (ops->destroy) | 884 | if (ops->destroy) |
879 | ops->destroy(sch); | 885 | ops->destroy(sch); |
880 | goto err_out3; | 886 | goto err_out3; |
@@ -882,7 +888,7 @@ err_out4: | |||
882 | 888 | ||
883 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | 889 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) |
884 | { | 890 | { |
885 | struct qdisc_size_table *stab = NULL; | 891 | struct qdisc_size_table *ostab, *stab = NULL; |
886 | int err = 0; | 892 | int err = 0; |
887 | 893 | ||
888 | if (tca[TCA_OPTIONS]) { | 894 | if (tca[TCA_OPTIONS]) { |
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | |||
899 | return PTR_ERR(stab); | 905 | return PTR_ERR(stab); |
900 | } | 906 | } |
901 | 907 | ||
902 | qdisc_put_stab(sch->stab); | 908 | ostab = rtnl_dereference(sch->stab); |
903 | sch->stab = stab; | 909 | rcu_assign_pointer(sch->stab, stab); |
910 | qdisc_put_stab(ostab); | ||
904 | 911 | ||
905 | if (tca[TCA_RATE]) { | 912 | if (tca[TCA_RATE]) { |
906 | /* NB: ignores errors from replace_estimator | 913 | /* NB: ignores errors from replace_estimator |
@@ -915,9 +922,8 @@ out: | |||
915 | return 0; | 922 | return 0; |
916 | } | 923 | } |
917 | 924 | ||
918 | struct check_loop_arg | 925 | struct check_loop_arg { |
919 | { | 926 | struct qdisc_walker w; |
920 | struct qdisc_walker w; | ||
921 | struct Qdisc *p; | 927 | struct Qdisc *p; |
922 | int depth; | 928 | int depth; |
923 | }; | 929 | }; |
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
970 | struct Qdisc *p = NULL; | 976 | struct Qdisc *p = NULL; |
971 | int err; | 977 | int err; |
972 | 978 | ||
973 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 979 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
980 | if (!dev) | ||
974 | return -ENODEV; | 981 | return -ENODEV; |
975 | 982 | ||
976 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 983 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
980 | if (clid) { | 987 | if (clid) { |
981 | if (clid != TC_H_ROOT) { | 988 | if (clid != TC_H_ROOT) { |
982 | if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { | 989 | if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { |
983 | if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) | 990 | p = qdisc_lookup(dev, TC_H_MAJ(clid)); |
991 | if (!p) | ||
984 | return -ENOENT; | 992 | return -ENOENT; |
985 | q = qdisc_leaf(p, clid); | 993 | q = qdisc_leaf(p, clid); |
986 | } else { /* ingress */ | 994 | } else if (dev_ingress_queue(dev)) { |
987 | if (dev_ingress_queue(dev)) | 995 | q = dev_ingress_queue(dev)->qdisc_sleeping; |
988 | q = dev_ingress_queue(dev)->qdisc_sleeping; | ||
989 | } | 996 | } |
990 | } else { | 997 | } else { |
991 | q = dev->qdisc; | 998 | q = dev->qdisc; |
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
996 | if (tcm->tcm_handle && q->handle != tcm->tcm_handle) | 1003 | if (tcm->tcm_handle && q->handle != tcm->tcm_handle) |
997 | return -EINVAL; | 1004 | return -EINVAL; |
998 | } else { | 1005 | } else { |
999 | if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) | 1006 | q = qdisc_lookup(dev, tcm->tcm_handle); |
1007 | if (!q) | ||
1000 | return -ENOENT; | 1008 | return -ENOENT; |
1001 | } | 1009 | } |
1002 | 1010 | ||
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1008 | return -EINVAL; | 1016 | return -EINVAL; |
1009 | if (q->handle == 0) | 1017 | if (q->handle == 0) |
1010 | return -ENOENT; | 1018 | return -ENOENT; |
1011 | if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) | 1019 | err = qdisc_graft(dev, p, skb, n, clid, NULL, q); |
1020 | if (err != 0) | ||
1012 | return err; | 1021 | return err; |
1013 | } else { | 1022 | } else { |
1014 | qdisc_notify(net, skb, n, clid, NULL, q); | 1023 | qdisc_notify(net, skb, n, clid, NULL, q); |
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1017 | } | 1026 | } |
1018 | 1027 | ||
1019 | /* | 1028 | /* |
1020 | Create/change qdisc. | 1029 | * Create/change qdisc. |
1021 | */ | 1030 | */ |
1022 | 1031 | ||
1023 | static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | 1032 | static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) |
@@ -1036,7 +1045,8 @@ replay: | |||
1036 | clid = tcm->tcm_parent; | 1045 | clid = tcm->tcm_parent; |
1037 | q = p = NULL; | 1046 | q = p = NULL; |
1038 | 1047 | ||
1039 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1048 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
1049 | if (!dev) | ||
1040 | return -ENODEV; | 1050 | return -ENODEV; |
1041 | 1051 | ||
1042 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 1052 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -1046,12 +1056,12 @@ replay: | |||
1046 | if (clid) { | 1056 | if (clid) { |
1047 | if (clid != TC_H_ROOT) { | 1057 | if (clid != TC_H_ROOT) { |
1048 | if (clid != TC_H_INGRESS) { | 1058 | if (clid != TC_H_INGRESS) { |
1049 | if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) | 1059 | p = qdisc_lookup(dev, TC_H_MAJ(clid)); |
1060 | if (!p) | ||
1050 | return -ENOENT; | 1061 | return -ENOENT; |
1051 | q = qdisc_leaf(p, clid); | 1062 | q = qdisc_leaf(p, clid); |
1052 | } else { /* ingress */ | 1063 | } else if (dev_ingress_queue_create(dev)) { |
1053 | if (dev_ingress_queue_create(dev)) | 1064 | q = dev_ingress_queue(dev)->qdisc_sleeping; |
1054 | q = dev_ingress_queue(dev)->qdisc_sleeping; | ||
1055 | } | 1065 | } |
1056 | } else { | 1066 | } else { |
1057 | q = dev->qdisc; | 1067 | q = dev->qdisc; |
@@ -1063,13 +1073,14 @@ replay: | |||
1063 | 1073 | ||
1064 | if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { | 1074 | if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { |
1065 | if (tcm->tcm_handle) { | 1075 | if (tcm->tcm_handle) { |
1066 | if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) | 1076 | if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) |
1067 | return -EEXIST; | 1077 | return -EEXIST; |
1068 | if (TC_H_MIN(tcm->tcm_handle)) | 1078 | if (TC_H_MIN(tcm->tcm_handle)) |
1069 | return -EINVAL; | 1079 | return -EINVAL; |
1070 | if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) | 1080 | q = qdisc_lookup(dev, tcm->tcm_handle); |
1081 | if (!q) | ||
1071 | goto create_n_graft; | 1082 | goto create_n_graft; |
1072 | if (n->nlmsg_flags&NLM_F_EXCL) | 1083 | if (n->nlmsg_flags & NLM_F_EXCL) |
1073 | return -EEXIST; | 1084 | return -EEXIST; |
1074 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) | 1085 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) |
1075 | return -EINVAL; | 1086 | return -EINVAL; |
@@ -1079,7 +1090,7 @@ replay: | |||
1079 | atomic_inc(&q->refcnt); | 1090 | atomic_inc(&q->refcnt); |
1080 | goto graft; | 1091 | goto graft; |
1081 | } else { | 1092 | } else { |
1082 | if (q == NULL) | 1093 | if (!q) |
1083 | goto create_n_graft; | 1094 | goto create_n_graft; |
1084 | 1095 | ||
1085 | /* This magic test requires explanation. | 1096 | /* This magic test requires explanation. |
@@ -1101,9 +1112,9 @@ replay: | |||
1101 | * For now we select create/graft, if | 1112 | * For now we select create/graft, if |
1102 | * user gave KIND, which does not match existing. | 1113 | * user gave KIND, which does not match existing. |
1103 | */ | 1114 | */ |
1104 | if ((n->nlmsg_flags&NLM_F_CREATE) && | 1115 | if ((n->nlmsg_flags & NLM_F_CREATE) && |
1105 | (n->nlmsg_flags&NLM_F_REPLACE) && | 1116 | (n->nlmsg_flags & NLM_F_REPLACE) && |
1106 | ((n->nlmsg_flags&NLM_F_EXCL) || | 1117 | ((n->nlmsg_flags & NLM_F_EXCL) || |
1107 | (tca[TCA_KIND] && | 1118 | (tca[TCA_KIND] && |
1108 | nla_strcmp(tca[TCA_KIND], q->ops->id)))) | 1119 | nla_strcmp(tca[TCA_KIND], q->ops->id)))) |
1109 | goto create_n_graft; | 1120 | goto create_n_graft; |
@@ -1118,7 +1129,7 @@ replay: | |||
1118 | /* Change qdisc parameters */ | 1129 | /* Change qdisc parameters */ |
1119 | if (q == NULL) | 1130 | if (q == NULL) |
1120 | return -ENOENT; | 1131 | return -ENOENT; |
1121 | if (n->nlmsg_flags&NLM_F_EXCL) | 1132 | if (n->nlmsg_flags & NLM_F_EXCL) |
1122 | return -EEXIST; | 1133 | return -EEXIST; |
1123 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) | 1134 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) |
1124 | return -EINVAL; | 1135 | return -EINVAL; |
@@ -1128,7 +1139,7 @@ replay: | |||
1128 | return err; | 1139 | return err; |
1129 | 1140 | ||
1130 | create_n_graft: | 1141 | create_n_graft: |
1131 | if (!(n->nlmsg_flags&NLM_F_CREATE)) | 1142 | if (!(n->nlmsg_flags & NLM_F_CREATE)) |
1132 | return -ENOENT; | 1143 | return -ENOENT; |
1133 | if (clid == TC_H_INGRESS) { | 1144 | if (clid == TC_H_INGRESS) { |
1134 | if (dev_ingress_queue(dev)) | 1145 | if (dev_ingress_queue(dev)) |
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1175 | struct nlmsghdr *nlh; | 1186 | struct nlmsghdr *nlh; |
1176 | unsigned char *b = skb_tail_pointer(skb); | 1187 | unsigned char *b = skb_tail_pointer(skb); |
1177 | struct gnet_dump d; | 1188 | struct gnet_dump d; |
1189 | struct qdisc_size_table *stab; | ||
1178 | 1190 | ||
1179 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); | 1191 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); |
1180 | tcm = NLMSG_DATA(nlh); | 1192 | tcm = NLMSG_DATA(nlh); |
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1190 | goto nla_put_failure; | 1202 | goto nla_put_failure; |
1191 | q->qstats.qlen = q->q.qlen; | 1203 | q->qstats.qlen = q->q.qlen; |
1192 | 1204 | ||
1193 | if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) | 1205 | stab = rtnl_dereference(q->stab); |
1206 | if (stab && qdisc_dump_stab(skb, stab) < 0) | ||
1194 | goto nla_put_failure; | 1207 | goto nla_put_failure; |
1195 | 1208 | ||
1196 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, | 1209 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, |
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, | |||
1234 | return -ENOBUFS; | 1247 | return -ENOBUFS; |
1235 | 1248 | ||
1236 | if (old && !tc_qdisc_dump_ignore(old)) { | 1249 | if (old && !tc_qdisc_dump_ignore(old)) { |
1237 | if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) | 1250 | if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, |
1251 | 0, RTM_DELQDISC) < 0) | ||
1238 | goto err_out; | 1252 | goto err_out; |
1239 | } | 1253 | } |
1240 | if (new && !tc_qdisc_dump_ignore(new)) { | 1254 | if (new && !tc_qdisc_dump_ignore(new)) { |
1241 | if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) | 1255 | if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, |
1256 | old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) | ||
1242 | goto err_out; | 1257 | goto err_out; |
1243 | } | 1258 | } |
1244 | 1259 | ||
1245 | if (skb->len) | 1260 | if (skb->len) |
1246 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 1261 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
1262 | n->nlmsg_flags & NLM_F_ECHO); | ||
1247 | 1263 | ||
1248 | err_out: | 1264 | err_out: |
1249 | kfree_skb(skb); | 1265 | kfree_skb(skb); |
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, | |||
1275 | q_idx++; | 1291 | q_idx++; |
1276 | continue; | 1292 | continue; |
1277 | } | 1293 | } |
1278 | if (!tc_qdisc_dump_ignore(q) && | 1294 | if (!tc_qdisc_dump_ignore(q) && |
1279 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | 1295 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, |
1280 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) | 1296 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) |
1281 | goto done; | 1297 | goto done; |
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1356 | u32 qid = TC_H_MAJ(clid); | 1372 | u32 qid = TC_H_MAJ(clid); |
1357 | int err; | 1373 | int err; |
1358 | 1374 | ||
1359 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1375 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
1376 | if (!dev) | ||
1360 | return -ENODEV; | 1377 | return -ENODEV; |
1361 | 1378 | ||
1362 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 1379 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1391 | qid = dev->qdisc->handle; | 1408 | qid = dev->qdisc->handle; |
1392 | 1409 | ||
1393 | /* Now qid is genuine qdisc handle consistent | 1410 | /* Now qid is genuine qdisc handle consistent |
1394 | both with parent and child. | 1411 | * both with parent and child. |
1395 | 1412 | * | |
1396 | TC_H_MAJ(pid) still may be unspecified, complete it now. | 1413 | * TC_H_MAJ(pid) still may be unspecified, complete it now. |
1397 | */ | 1414 | */ |
1398 | if (pid) | 1415 | if (pid) |
1399 | pid = TC_H_MAKE(qid, pid); | 1416 | pid = TC_H_MAKE(qid, pid); |
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1403 | } | 1420 | } |
1404 | 1421 | ||
1405 | /* OK. Locate qdisc */ | 1422 | /* OK. Locate qdisc */ |
1406 | if ((q = qdisc_lookup(dev, qid)) == NULL) | 1423 | q = qdisc_lookup(dev, qid); |
1424 | if (!q) | ||
1407 | return -ENOENT; | 1425 | return -ENOENT; |
1408 | 1426 | ||
1409 | /* An check that it supports classes */ | 1427 | /* An check that it supports classes */ |
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1423 | 1441 | ||
1424 | if (cl == 0) { | 1442 | if (cl == 0) { |
1425 | err = -ENOENT; | 1443 | err = -ENOENT; |
1426 | if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) | 1444 | if (n->nlmsg_type != RTM_NEWTCLASS || |
1445 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
1427 | goto out; | 1446 | goto out; |
1428 | } else { | 1447 | } else { |
1429 | switch (n->nlmsg_type) { | 1448 | switch (n->nlmsg_type) { |
1430 | case RTM_NEWTCLASS: | 1449 | case RTM_NEWTCLASS: |
1431 | err = -EEXIST; | 1450 | err = -EEXIST; |
1432 | if (n->nlmsg_flags&NLM_F_EXCL) | 1451 | if (n->nlmsg_flags & NLM_F_EXCL) |
1433 | goto out; | 1452 | goto out; |
1434 | break; | 1453 | break; |
1435 | case RTM_DELTCLASS: | 1454 | case RTM_DELTCLASS: |
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, | |||
1521 | return -EINVAL; | 1540 | return -EINVAL; |
1522 | } | 1541 | } |
1523 | 1542 | ||
1524 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 1543 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
1544 | n->nlmsg_flags & NLM_F_ECHO); | ||
1525 | } | 1545 | } |
1526 | 1546 | ||
1527 | struct qdisc_dump_args | 1547 | struct qdisc_dump_args { |
1528 | { | 1548 | struct qdisc_walker w; |
1529 | struct qdisc_walker w; | 1549 | struct sk_buff *skb; |
1530 | struct sk_buff *skb; | 1550 | struct netlink_callback *cb; |
1531 | struct netlink_callback *cb; | ||
1532 | }; | 1551 | }; |
1533 | 1552 | ||
1534 | static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) | 1553 | static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) |
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, | |||
1590 | 1609 | ||
1591 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | 1610 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) |
1592 | { | 1611 | { |
1593 | struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); | 1612 | struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); |
1594 | struct net *net = sock_net(skb->sk); | 1613 | struct net *net = sock_net(skb->sk); |
1595 | struct netdev_queue *dev_queue; | 1614 | struct netdev_queue *dev_queue; |
1596 | struct net_device *dev; | 1615 | struct net_device *dev; |
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1598 | 1617 | ||
1599 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) | 1618 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) |
1600 | return 0; | 1619 | return 0; |
1601 | if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1620 | dev = dev_get_by_index(net, tcm->tcm_ifindex); |
1621 | if (!dev) | ||
1602 | return 0; | 1622 | return 0; |
1603 | 1623 | ||
1604 | s_t = cb->args[0]; | 1624 | s_t = cb->args[0]; |
@@ -1621,19 +1641,22 @@ done: | |||
1621 | } | 1641 | } |
1622 | 1642 | ||
1623 | /* Main classifier routine: scans classifier chain attached | 1643 | /* Main classifier routine: scans classifier chain attached |
1624 | to this qdisc, (optionally) tests for protocol and asks | 1644 | * to this qdisc, (optionally) tests for protocol and asks |
1625 | specific classifiers. | 1645 | * specific classifiers. |
1626 | */ | 1646 | */ |
1627 | int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, | 1647 | int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, |
1628 | struct tcf_result *res) | 1648 | struct tcf_result *res) |
1629 | { | 1649 | { |
1630 | __be16 protocol = skb->protocol; | 1650 | __be16 protocol = skb->protocol; |
1631 | int err = 0; | 1651 | int err; |
1632 | 1652 | ||
1633 | for (; tp; tp = tp->next) { | 1653 | for (; tp; tp = tp->next) { |
1634 | if ((tp->protocol == protocol || | 1654 | if (tp->protocol != protocol && |
1635 | tp->protocol == htons(ETH_P_ALL)) && | 1655 | tp->protocol != htons(ETH_P_ALL)) |
1636 | (err = tp->classify(skb, tp, res)) >= 0) { | 1656 | continue; |
1657 | err = tp->classify(skb, tp, res); | ||
1658 | |||
1659 | if (err >= 0) { | ||
1637 | #ifdef CONFIG_NET_CLS_ACT | 1660 | #ifdef CONFIG_NET_CLS_ACT |
1638 | if (err != TC_ACT_RECLASSIFY && skb->tc_verd) | 1661 | if (err != TC_ACT_RECLASSIFY && skb->tc_verd) |
1639 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); | 1662 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); |
@@ -1664,11 +1687,11 @@ reclassify: | |||
1664 | 1687 | ||
1665 | if (verd++ >= MAX_REC_LOOP) { | 1688 | if (verd++ >= MAX_REC_LOOP) { |
1666 | if (net_ratelimit()) | 1689 | if (net_ratelimit()) |
1667 | printk(KERN_NOTICE | 1690 | pr_notice("%s: packet reclassify loop" |
1668 | "%s: packet reclassify loop" | ||
1669 | " rule prio %u protocol %02x\n", | 1691 | " rule prio %u protocol %02x\n", |
1670 | tp->q->ops->id, | 1692 | tp->q->ops->id, |
1671 | tp->prio & 0xffff, ntohs(tp->protocol)); | 1693 | tp->prio & 0xffff, |
1694 | ntohs(tp->protocol)); | ||
1672 | return TC_ACT_SHOT; | 1695 | return TC_ACT_SHOT; |
1673 | } | 1696 | } |
1674 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); | 1697 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); |
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void) | |||
1761 | 1784 | ||
1762 | err = register_pernet_subsys(&psched_net_ops); | 1785 | err = register_pernet_subsys(&psched_net_ops); |
1763 | if (err) { | 1786 | if (err) { |
1764 | printk(KERN_ERR "pktsched_init: " | 1787 | pr_err("pktsched_init: " |
1765 | "cannot initialize per netns operations\n"); | 1788 | "cannot initialize per netns operations\n"); |
1766 | return err; | 1789 | return err; |
1767 | } | 1790 | } |
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 943d733409d0..3f08158b8688 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c | |||
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) | |||
319 | * creation), and one for the reference held when calling delete. | 319 | * creation), and one for the reference held when calling delete. |
320 | */ | 320 | */ |
321 | if (flow->ref < 2) { | 321 | if (flow->ref < 2) { |
322 | printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); | 322 | pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); |
323 | return -EINVAL; | 323 | return -EINVAL; |
324 | } | 324 | } |
325 | if (flow->ref > 2) | 325 | if (flow->ref > 2) |
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
384 | } | 384 | } |
385 | } | 385 | } |
386 | flow = NULL; | 386 | flow = NULL; |
387 | done: | 387 | done: |
388 | ; | 388 | ; |
389 | } | 389 | } |
390 | if (!flow) | 390 | if (!flow) { |
391 | flow = &p->link; | 391 | flow = &p->link; |
392 | else { | 392 | } else { |
393 | if (flow->vcc) | 393 | if (flow->vcc) |
394 | ATM_SKB(skb)->atm_options = flow->vcc->atm_options; | 394 | ATM_SKB(skb)->atm_options = flow->vcc->atm_options; |
395 | /*@@@ looks good ... but it's not supposed to work :-) */ | 395 | /*@@@ looks good ... but it's not supposed to work :-) */ |
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch) | |||
576 | 576 | ||
577 | list_for_each_entry_safe(flow, tmp, &p->flows, list) { | 577 | list_for_each_entry_safe(flow, tmp, &p->flows, list) { |
578 | if (flow->ref > 1) | 578 | if (flow->ref > 1) |
579 | printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, | 579 | pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); |
580 | flow->ref); | ||
581 | atm_tc_put(sch, (unsigned long)flow); | 580 | atm_tc_put(sch, (unsigned long)flow); |
582 | } | 581 | } |
583 | tasklet_kill(&p->task); | 582 | tasklet_kill(&p->task); |
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, | |||
616 | } | 615 | } |
617 | if (flow->excess) | 616 | if (flow->excess) |
618 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); | 617 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); |
619 | else { | 618 | else |
620 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); | 619 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); |
621 | } | ||
622 | 620 | ||
623 | nla_nest_end(skb, nest); | 621 | nla_nest_end(skb, nest); |
624 | return skb->len; | 622 | return skb->len; |
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 5f63ec58942c..24d94c097b35 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c | |||
@@ -72,8 +72,7 @@ | |||
72 | struct cbq_sched_data; | 72 | struct cbq_sched_data; |
73 | 73 | ||
74 | 74 | ||
75 | struct cbq_class | 75 | struct cbq_class { |
76 | { | ||
77 | struct Qdisc_class_common common; | 76 | struct Qdisc_class_common common; |
78 | struct cbq_class *next_alive; /* next class with backlog in this priority band */ | 77 | struct cbq_class *next_alive; /* next class with backlog in this priority band */ |
79 | 78 | ||
@@ -139,19 +138,18 @@ struct cbq_class | |||
139 | int refcnt; | 138 | int refcnt; |
140 | int filters; | 139 | int filters; |
141 | 140 | ||
142 | struct cbq_class *defaults[TC_PRIO_MAX+1]; | 141 | struct cbq_class *defaults[TC_PRIO_MAX + 1]; |
143 | }; | 142 | }; |
144 | 143 | ||
145 | struct cbq_sched_data | 144 | struct cbq_sched_data { |
146 | { | ||
147 | struct Qdisc_class_hash clhash; /* Hash table of all classes */ | 145 | struct Qdisc_class_hash clhash; /* Hash table of all classes */ |
148 | int nclasses[TC_CBQ_MAXPRIO+1]; | 146 | int nclasses[TC_CBQ_MAXPRIO + 1]; |
149 | unsigned quanta[TC_CBQ_MAXPRIO+1]; | 147 | unsigned int quanta[TC_CBQ_MAXPRIO + 1]; |
150 | 148 | ||
151 | struct cbq_class link; | 149 | struct cbq_class link; |
152 | 150 | ||
153 | unsigned activemask; | 151 | unsigned int activemask; |
154 | struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes | 152 | struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes |
155 | with backlog */ | 153 | with backlog */ |
156 | 154 | ||
157 | #ifdef CONFIG_NET_CLS_ACT | 155 | #ifdef CONFIG_NET_CLS_ACT |
@@ -162,7 +160,7 @@ struct cbq_sched_data | |||
162 | int tx_len; | 160 | int tx_len; |
163 | psched_time_t now; /* Cached timestamp */ | 161 | psched_time_t now; /* Cached timestamp */ |
164 | psched_time_t now_rt; /* Cached real time */ | 162 | psched_time_t now_rt; /* Cached real time */ |
165 | unsigned pmask; | 163 | unsigned int pmask; |
166 | 164 | ||
167 | struct hrtimer delay_timer; | 165 | struct hrtimer delay_timer; |
168 | struct qdisc_watchdog watchdog; /* Watchdog timer, | 166 | struct qdisc_watchdog watchdog; /* Watchdog timer, |
@@ -175,9 +173,9 @@ struct cbq_sched_data | |||
175 | }; | 173 | }; |
176 | 174 | ||
177 | 175 | ||
178 | #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) | 176 | #define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) |
179 | 177 | ||
180 | static __inline__ struct cbq_class * | 178 | static inline struct cbq_class * |
181 | cbq_class_lookup(struct cbq_sched_data *q, u32 classid) | 179 | cbq_class_lookup(struct cbq_sched_data *q, u32 classid) |
182 | { | 180 | { |
183 | struct Qdisc_class_common *clc; | 181 | struct Qdisc_class_common *clc; |
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) | |||
193 | static struct cbq_class * | 191 | static struct cbq_class * |
194 | cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) | 192 | cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) |
195 | { | 193 | { |
196 | struct cbq_class *cl, *new; | 194 | struct cbq_class *cl; |
197 | 195 | ||
198 | for (cl = this->tparent; cl; cl = cl->tparent) | 196 | for (cl = this->tparent; cl; cl = cl->tparent) { |
199 | if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) | 197 | struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; |
200 | return new; | ||
201 | 198 | ||
199 | if (new != NULL && new != this) | ||
200 | return new; | ||
201 | } | ||
202 | return NULL; | 202 | return NULL; |
203 | } | 203 | } |
204 | 204 | ||
205 | #endif | 205 | #endif |
206 | 206 | ||
207 | /* Classify packet. The procedure is pretty complicated, but | 207 | /* Classify packet. The procedure is pretty complicated, but |
208 | it allows us to combine link sharing and priority scheduling | 208 | * it allows us to combine link sharing and priority scheduling |
209 | transparently. | 209 | * transparently. |
210 | 210 | * | |
211 | Namely, you can put link sharing rules (f.e. route based) at root of CBQ, | 211 | * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, |
212 | so that it resolves to split nodes. Then packets are classified | 212 | * so that it resolves to split nodes. Then packets are classified |
213 | by logical priority, or a more specific classifier may be attached | 213 | * by logical priority, or a more specific classifier may be attached |
214 | to the split node. | 214 | * to the split node. |
215 | */ | 215 | */ |
216 | 216 | ||
217 | static struct cbq_class * | 217 | static struct cbq_class * |
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
227 | /* | 227 | /* |
228 | * Step 1. If skb->priority points to one of our classes, use it. | 228 | * Step 1. If skb->priority points to one of our classes, use it. |
229 | */ | 229 | */ |
230 | if (TC_H_MAJ(prio^sch->handle) == 0 && | 230 | if (TC_H_MAJ(prio ^ sch->handle) == 0 && |
231 | (cl = cbq_class_lookup(q, prio)) != NULL) | 231 | (cl = cbq_class_lookup(q, prio)) != NULL) |
232 | return cl; | 232 | return cl; |
233 | 233 | ||
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
243 | (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) | 243 | (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) |
244 | goto fallback; | 244 | goto fallback; |
245 | 245 | ||
246 | if ((cl = (void*)res.class) == NULL) { | 246 | cl = (void *)res.class; |
247 | if (!cl) { | ||
247 | if (TC_H_MAJ(res.classid)) | 248 | if (TC_H_MAJ(res.classid)) |
248 | cl = cbq_class_lookup(q, res.classid); | 249 | cl = cbq_class_lookup(q, res.classid); |
249 | else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) | 250 | else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) |
250 | cl = defmap[TC_PRIO_BESTEFFORT]; | 251 | cl = defmap[TC_PRIO_BESTEFFORT]; |
251 | 252 | ||
252 | if (cl == NULL || cl->level >= head->level) | 253 | if (cl == NULL || cl->level >= head->level) |
@@ -282,7 +283,7 @@ fallback: | |||
282 | * Step 4. No success... | 283 | * Step 4. No success... |
283 | */ | 284 | */ |
284 | if (TC_H_MAJ(prio) == 0 && | 285 | if (TC_H_MAJ(prio) == 0 && |
285 | !(cl = head->defaults[prio&TC_PRIO_MAX]) && | 286 | !(cl = head->defaults[prio & TC_PRIO_MAX]) && |
286 | !(cl = head->defaults[TC_PRIO_BESTEFFORT])) | 287 | !(cl = head->defaults[TC_PRIO_BESTEFFORT])) |
287 | return head; | 288 | return head; |
288 | 289 | ||
@@ -290,12 +291,12 @@ fallback: | |||
290 | } | 291 | } |
291 | 292 | ||
292 | /* | 293 | /* |
293 | A packet has just been enqueued on the empty class. | 294 | * A packet has just been enqueued on the empty class. |
294 | cbq_activate_class adds it to the tail of active class list | 295 | * cbq_activate_class adds it to the tail of active class list |
295 | of its priority band. | 296 | * of its priority band. |
296 | */ | 297 | */ |
297 | 298 | ||
298 | static __inline__ void cbq_activate_class(struct cbq_class *cl) | 299 | static inline void cbq_activate_class(struct cbq_class *cl) |
299 | { | 300 | { |
300 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 301 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
301 | int prio = cl->cpriority; | 302 | int prio = cl->cpriority; |
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl) | |||
314 | } | 315 | } |
315 | 316 | ||
316 | /* | 317 | /* |
317 | Unlink class from active chain. | 318 | * Unlink class from active chain. |
318 | Note that this same procedure is done directly in cbq_dequeue* | 319 | * Note that this same procedure is done directly in cbq_dequeue* |
319 | during round-robin procedure. | 320 | * during round-robin procedure. |
320 | */ | 321 | */ |
321 | 322 | ||
322 | static void cbq_deactivate_class(struct cbq_class *this) | 323 | static void cbq_deactivate_class(struct cbq_class *this) |
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) | |||
350 | { | 351 | { |
351 | int toplevel = q->toplevel; | 352 | int toplevel = q->toplevel; |
352 | 353 | ||
353 | if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { | 354 | if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { |
354 | psched_time_t now; | 355 | psched_time_t now; |
355 | psched_tdiff_t incr; | 356 | psched_tdiff_t incr; |
356 | 357 | ||
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) | |||
363 | q->toplevel = cl->level; | 364 | q->toplevel = cl->level; |
364 | return; | 365 | return; |
365 | } | 366 | } |
366 | } while ((cl=cl->borrow) != NULL && toplevel > cl->level); | 367 | } while ((cl = cl->borrow) != NULL && toplevel > cl->level); |
367 | } | 368 | } |
368 | } | 369 | } |
369 | 370 | ||
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
417 | delay += cl->offtime; | 418 | delay += cl->offtime; |
418 | 419 | ||
419 | /* | 420 | /* |
420 | Class goes to sleep, so that it will have no | 421 | * Class goes to sleep, so that it will have no |
421 | chance to work avgidle. Let's forgive it 8) | 422 | * chance to work avgidle. Let's forgive it 8) |
422 | 423 | * | |
423 | BTW cbq-2.0 has a crap in this | 424 | * BTW cbq-2.0 has a crap in this |
424 | place, apparently they forgot to shift it by cl->ewma_log. | 425 | * place, apparently they forgot to shift it by cl->ewma_log. |
425 | */ | 426 | */ |
426 | if (cl->avgidle < 0) | 427 | if (cl->avgidle < 0) |
427 | delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); | 428 | delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); |
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
438 | q->wd_expires = delay; | 439 | q->wd_expires = delay; |
439 | 440 | ||
440 | /* Dirty work! We must schedule wakeups based on | 441 | /* Dirty work! We must schedule wakeups based on |
441 | real available rate, rather than leaf rate, | 442 | * real available rate, rather than leaf rate, |
442 | which may be tiny (even zero). | 443 | * which may be tiny (even zero). |
443 | */ | 444 | */ |
444 | if (q->toplevel == TC_CBQ_MAXLEVEL) { | 445 | if (q->toplevel == TC_CBQ_MAXLEVEL) { |
445 | struct cbq_class *b; | 446 | struct cbq_class *b; |
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
459 | } | 460 | } |
460 | 461 | ||
461 | /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when | 462 | /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when |
462 | they go overlimit | 463 | * they go overlimit |
463 | */ | 464 | */ |
464 | 465 | ||
465 | static void cbq_ovl_rclassic(struct cbq_class *cl) | 466 | static void cbq_ovl_rclassic(struct cbq_class *cl) |
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) | |||
594 | struct Qdisc *sch = q->watchdog.qdisc; | 595 | struct Qdisc *sch = q->watchdog.qdisc; |
595 | psched_time_t now; | 596 | psched_time_t now; |
596 | psched_tdiff_t delay = 0; | 597 | psched_tdiff_t delay = 0; |
597 | unsigned pmask; | 598 | unsigned int pmask; |
598 | 599 | ||
599 | now = psched_get_time(); | 600 | now = psched_get_time(); |
600 | 601 | ||
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) | |||
623 | hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); | 624 | hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); |
624 | } | 625 | } |
625 | 626 | ||
626 | sch->flags &= ~TCQ_F_THROTTLED; | 627 | qdisc_unthrottled(sch); |
627 | __netif_schedule(qdisc_root(sch)); | 628 | __netif_schedule(qdisc_root(sch)); |
628 | return HRTIMER_NORESTART; | 629 | return HRTIMER_NORESTART; |
629 | } | 630 | } |
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) | |||
663 | #endif | 664 | #endif |
664 | 665 | ||
665 | /* | 666 | /* |
666 | It is mission critical procedure. | 667 | * It is mission critical procedure. |
667 | 668 | * | |
668 | We "regenerate" toplevel cutoff, if transmitting class | 669 | * We "regenerate" toplevel cutoff, if transmitting class |
669 | has backlog and it is not regulated. It is not part of | 670 | * has backlog and it is not regulated. It is not part of |
670 | original CBQ description, but looks more reasonable. | 671 | * original CBQ description, but looks more reasonable. |
671 | Probably, it is wrong. This question needs further investigation. | 672 | * Probably, it is wrong. This question needs further investigation. |
672 | */ | 673 | */ |
673 | 674 | ||
674 | static __inline__ void | 675 | static inline void |
675 | cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, | 676 | cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, |
676 | struct cbq_class *borrowed) | 677 | struct cbq_class *borrowed) |
677 | { | 678 | { |
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, | |||
682 | q->toplevel = borrowed->level; | 683 | q->toplevel = borrowed->level; |
683 | return; | 684 | return; |
684 | } | 685 | } |
685 | } while ((borrowed=borrowed->borrow) != NULL); | 686 | } while ((borrowed = borrowed->borrow) != NULL); |
686 | } | 687 | } |
687 | #if 0 | 688 | #if 0 |
688 | /* It is not necessary now. Uncommenting it | 689 | /* It is not necessary now. Uncommenting it |
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q) | |||
710 | cl->bstats.bytes += len; | 711 | cl->bstats.bytes += len; |
711 | 712 | ||
712 | /* | 713 | /* |
713 | (now - last) is total time between packet right edges. | 714 | * (now - last) is total time between packet right edges. |
714 | (last_pktlen/rate) is "virtual" busy time, so that | 715 | * (last_pktlen/rate) is "virtual" busy time, so that |
715 | 716 | * | |
716 | idle = (now - last) - last_pktlen/rate | 717 | * idle = (now - last) - last_pktlen/rate |
717 | */ | 718 | */ |
718 | 719 | ||
719 | idle = q->now - cl->last; | 720 | idle = q->now - cl->last; |
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q) | |||
723 | idle -= L2T(cl, len); | 724 | idle -= L2T(cl, len); |
724 | 725 | ||
725 | /* true_avgidle := (1-W)*true_avgidle + W*idle, | 726 | /* true_avgidle := (1-W)*true_avgidle + W*idle, |
726 | where W=2^{-ewma_log}. But cl->avgidle is scaled: | 727 | * where W=2^{-ewma_log}. But cl->avgidle is scaled: |
727 | cl->avgidle == true_avgidle/W, | 728 | * cl->avgidle == true_avgidle/W, |
728 | hence: | 729 | * hence: |
729 | */ | 730 | */ |
730 | avgidle += idle - (avgidle>>cl->ewma_log); | 731 | avgidle += idle - (avgidle>>cl->ewma_log); |
731 | } | 732 | } |
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q) | |||
739 | cl->avgidle = avgidle; | 740 | cl->avgidle = avgidle; |
740 | 741 | ||
741 | /* Calculate expected time, when this class | 742 | /* Calculate expected time, when this class |
742 | will be allowed to send. | 743 | * will be allowed to send. |
743 | It will occur, when: | 744 | * It will occur, when: |
744 | (1-W)*true_avgidle + W*delay = 0, i.e. | 745 | * (1-W)*true_avgidle + W*delay = 0, i.e. |
745 | idle = (1/W - 1)*(-true_avgidle) | 746 | * idle = (1/W - 1)*(-true_avgidle) |
746 | or | 747 | * or |
747 | idle = (1 - W)*(-cl->avgidle); | 748 | * idle = (1 - W)*(-cl->avgidle); |
748 | */ | 749 | */ |
749 | idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); | 750 | idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); |
750 | 751 | ||
751 | /* | 752 | /* |
752 | That is not all. | 753 | * That is not all. |
753 | To maintain the rate allocated to the class, | 754 | * To maintain the rate allocated to the class, |
754 | we add to undertime virtual clock, | 755 | * we add to undertime virtual clock, |
755 | necessary to complete transmitted packet. | 756 | * necessary to complete transmitted packet. |
756 | (len/phys_bandwidth has been already passed | 757 | * (len/phys_bandwidth has been already passed |
757 | to the moment of cbq_update) | 758 | * to the moment of cbq_update) |
758 | */ | 759 | */ |
759 | 760 | ||
760 | idle -= L2T(&q->link, len); | 761 | idle -= L2T(&q->link, len); |
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q) | |||
776 | cbq_update_toplevel(q, this, q->tx_borrowed); | 777 | cbq_update_toplevel(q, this, q->tx_borrowed); |
777 | } | 778 | } |
778 | 779 | ||
779 | static __inline__ struct cbq_class * | 780 | static inline struct cbq_class * |
780 | cbq_under_limit(struct cbq_class *cl) | 781 | cbq_under_limit(struct cbq_class *cl) |
781 | { | 782 | { |
782 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 783 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl) | |||
792 | 793 | ||
793 | do { | 794 | do { |
794 | /* It is very suspicious place. Now overlimit | 795 | /* It is very suspicious place. Now overlimit |
795 | action is generated for not bounded classes | 796 | * action is generated for not bounded classes |
796 | only if link is completely congested. | 797 | * only if link is completely congested. |
797 | Though it is in agree with ancestor-only paradigm, | 798 | * Though it is in agree with ancestor-only paradigm, |
798 | it looks very stupid. Particularly, | 799 | * it looks very stupid. Particularly, |
799 | it means that this chunk of code will either | 800 | * it means that this chunk of code will either |
800 | never be called or result in strong amplification | 801 | * never be called or result in strong amplification |
801 | of burstiness. Dangerous, silly, and, however, | 802 | * of burstiness. Dangerous, silly, and, however, |
802 | no another solution exists. | 803 | * no another solution exists. |
803 | */ | 804 | */ |
804 | if ((cl = cl->borrow) == NULL) { | 805 | cl = cl->borrow; |
806 | if (!cl) { | ||
805 | this_cl->qstats.overlimits++; | 807 | this_cl->qstats.overlimits++; |
806 | this_cl->overlimit(this_cl); | 808 | this_cl->overlimit(this_cl); |
807 | return NULL; | 809 | return NULL; |
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl) | |||
814 | return cl; | 816 | return cl; |
815 | } | 817 | } |
816 | 818 | ||
817 | static __inline__ struct sk_buff * | 819 | static inline struct sk_buff * |
818 | cbq_dequeue_prio(struct Qdisc *sch, int prio) | 820 | cbq_dequeue_prio(struct Qdisc *sch, int prio) |
819 | { | 821 | { |
820 | struct cbq_sched_data *q = qdisc_priv(sch); | 822 | struct cbq_sched_data *q = qdisc_priv(sch); |
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
838 | 840 | ||
839 | if (cl->deficit <= 0) { | 841 | if (cl->deficit <= 0) { |
840 | /* Class exhausted its allotment per | 842 | /* Class exhausted its allotment per |
841 | this round. Switch to the next one. | 843 | * this round. Switch to the next one. |
842 | */ | 844 | */ |
843 | deficit = 1; | 845 | deficit = 1; |
844 | cl->deficit += cl->quantum; | 846 | cl->deficit += cl->quantum; |
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
848 | skb = cl->q->dequeue(cl->q); | 850 | skb = cl->q->dequeue(cl->q); |
849 | 851 | ||
850 | /* Class did not give us any skb :-( | 852 | /* Class did not give us any skb :-( |
851 | It could occur even if cl->q->q.qlen != 0 | 853 | * It could occur even if cl->q->q.qlen != 0 |
852 | f.e. if cl->q == "tbf" | 854 | * f.e. if cl->q == "tbf" |
853 | */ | 855 | */ |
854 | if (skb == NULL) | 856 | if (skb == NULL) |
855 | goto skip_class; | 857 | goto skip_class; |
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
878 | skip_class: | 880 | skip_class: |
879 | if (cl->q->q.qlen == 0 || prio != cl->cpriority) { | 881 | if (cl->q->q.qlen == 0 || prio != cl->cpriority) { |
880 | /* Class is empty or penalized. | 882 | /* Class is empty or penalized. |
881 | Unlink it from active chain. | 883 | * Unlink it from active chain. |
882 | */ | 884 | */ |
883 | cl_prev->next_alive = cl->next_alive; | 885 | cl_prev->next_alive = cl->next_alive; |
884 | cl->next_alive = NULL; | 886 | cl->next_alive = NULL; |
@@ -917,14 +919,14 @@ next_class: | |||
917 | return NULL; | 919 | return NULL; |
918 | } | 920 | } |
919 | 921 | ||
920 | static __inline__ struct sk_buff * | 922 | static inline struct sk_buff * |
921 | cbq_dequeue_1(struct Qdisc *sch) | 923 | cbq_dequeue_1(struct Qdisc *sch) |
922 | { | 924 | { |
923 | struct cbq_sched_data *q = qdisc_priv(sch); | 925 | struct cbq_sched_data *q = qdisc_priv(sch); |
924 | struct sk_buff *skb; | 926 | struct sk_buff *skb; |
925 | unsigned activemask; | 927 | unsigned int activemask; |
926 | 928 | ||
927 | activemask = q->activemask&0xFF; | 929 | activemask = q->activemask & 0xFF; |
928 | while (activemask) { | 930 | while (activemask) { |
929 | int prio = ffz(~activemask); | 931 | int prio = ffz(~activemask); |
930 | activemask &= ~(1<<prio); | 932 | activemask &= ~(1<<prio); |
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch) | |||
949 | if (q->tx_class) { | 951 | if (q->tx_class) { |
950 | psched_tdiff_t incr2; | 952 | psched_tdiff_t incr2; |
951 | /* Time integrator. We calculate EOS time | 953 | /* Time integrator. We calculate EOS time |
952 | by adding expected packet transmission time. | 954 | * by adding expected packet transmission time. |
953 | If real time is greater, we warp artificial clock, | 955 | * If real time is greater, we warp artificial clock, |
954 | so that: | 956 | * so that: |
955 | 957 | * | |
956 | cbq_time = max(real_time, work); | 958 | * cbq_time = max(real_time, work); |
957 | */ | 959 | */ |
958 | incr2 = L2T(&q->link, q->tx_len); | 960 | incr2 = L2T(&q->link, q->tx_len); |
959 | q->now += incr2; | 961 | q->now += incr2; |
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch) | |||
971 | if (skb) { | 973 | if (skb) { |
972 | qdisc_bstats_update(sch, skb); | 974 | qdisc_bstats_update(sch, skb); |
973 | sch->q.qlen--; | 975 | sch->q.qlen--; |
974 | sch->flags &= ~TCQ_F_THROTTLED; | 976 | qdisc_unthrottled(sch); |
975 | return skb; | 977 | return skb; |
976 | } | 978 | } |
977 | 979 | ||
978 | /* All the classes are overlimit. | 980 | /* All the classes are overlimit. |
979 | 981 | * | |
980 | It is possible, if: | 982 | * It is possible, if: |
981 | 983 | * | |
982 | 1. Scheduler is empty. | 984 | * 1. Scheduler is empty. |
983 | 2. Toplevel cutoff inhibited borrowing. | 985 | * 2. Toplevel cutoff inhibited borrowing. |
984 | 3. Root class is overlimit. | 986 | * 3. Root class is overlimit. |
985 | 987 | * | |
986 | Reset 2d and 3d conditions and retry. | 988 | * Reset 2d and 3d conditions and retry. |
987 | 989 | * | |
988 | Note, that NS and cbq-2.0 are buggy, peeking | 990 | * Note, that NS and cbq-2.0 are buggy, peeking |
989 | an arbitrary class is appropriate for ancestor-only | 991 | * an arbitrary class is appropriate for ancestor-only |
990 | sharing, but not for toplevel algorithm. | 992 | * sharing, but not for toplevel algorithm. |
991 | 993 | * | |
992 | Our version is better, but slower, because it requires | 994 | * Our version is better, but slower, because it requires |
993 | two passes, but it is unavoidable with top-level sharing. | 995 | * two passes, but it is unavoidable with top-level sharing. |
994 | */ | 996 | */ |
995 | 997 | ||
996 | if (q->toplevel == TC_CBQ_MAXLEVEL && | 998 | if (q->toplevel == TC_CBQ_MAXLEVEL && |
997 | q->link.undertime == PSCHED_PASTPERFECT) | 999 | q->link.undertime == PSCHED_PASTPERFECT) |
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch) | |||
1002 | } | 1004 | } |
1003 | 1005 | ||
1004 | /* No packets in scheduler or nobody wants to give them to us :-( | 1006 | /* No packets in scheduler or nobody wants to give them to us :-( |
1005 | Sigh... start watchdog timer in the last case. */ | 1007 | * Sigh... start watchdog timer in the last case. |
1008 | */ | ||
1006 | 1009 | ||
1007 | if (sch->q.qlen) { | 1010 | if (sch->q.qlen) { |
1008 | sch->qstats.overlimits++; | 1011 | sch->qstats.overlimits++; |
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this) | |||
1024 | int level = 0; | 1027 | int level = 0; |
1025 | struct cbq_class *cl; | 1028 | struct cbq_class *cl; |
1026 | 1029 | ||
1027 | if ((cl = this->children) != NULL) { | 1030 | cl = this->children; |
1031 | if (cl) { | ||
1028 | do { | 1032 | do { |
1029 | if (cl->level > level) | 1033 | if (cl->level > level) |
1030 | level = cl->level; | 1034 | level = cl->level; |
1031 | } while ((cl = cl->sibling) != this->children); | 1035 | } while ((cl = cl->sibling) != this->children); |
1032 | } | 1036 | } |
1033 | this->level = level+1; | 1037 | this->level = level + 1; |
1034 | } while ((this = this->tparent) != NULL); | 1038 | } while ((this = this->tparent) != NULL); |
1035 | } | 1039 | } |
1036 | 1040 | ||
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) | |||
1046 | for (h = 0; h < q->clhash.hashsize; h++) { | 1050 | for (h = 0; h < q->clhash.hashsize; h++) { |
1047 | hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { | 1051 | hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { |
1048 | /* BUGGGG... Beware! This expression suffer of | 1052 | /* BUGGGG... Beware! This expression suffer of |
1049 | arithmetic overflows! | 1053 | * arithmetic overflows! |
1050 | */ | 1054 | */ |
1051 | if (cl->priority == prio) { | 1055 | if (cl->priority == prio) { |
1052 | cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ | 1056 | cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ |
1053 | q->quanta[prio]; | 1057 | q->quanta[prio]; |
1054 | } | 1058 | } |
1055 | if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { | 1059 | if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { |
1056 | printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); | 1060 | pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", |
1061 | cl->common.classid, cl->quantum); | ||
1057 | cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; | 1062 | cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; |
1058 | } | 1063 | } |
1059 | } | 1064 | } |
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl) | |||
1064 | { | 1069 | { |
1065 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 1070 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
1066 | struct cbq_class *split = cl->split; | 1071 | struct cbq_class *split = cl->split; |
1067 | unsigned h; | 1072 | unsigned int h; |
1068 | int i; | 1073 | int i; |
1069 | 1074 | ||
1070 | if (split == NULL) | 1075 | if (split == NULL) |
1071 | return; | 1076 | return; |
1072 | 1077 | ||
1073 | for (i=0; i<=TC_PRIO_MAX; i++) { | 1078 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
1074 | if (split->defaults[i] == cl && !(cl->defmap&(1<<i))) | 1079 | if (split->defaults[i] == cl && !(cl->defmap & (1<<i))) |
1075 | split->defaults[i] = NULL; | 1080 | split->defaults[i] = NULL; |
1076 | } | 1081 | } |
1077 | 1082 | ||
1078 | for (i=0; i<=TC_PRIO_MAX; i++) { | 1083 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
1079 | int level = split->level; | 1084 | int level = split->level; |
1080 | 1085 | ||
1081 | if (split->defaults[i]) | 1086 | if (split->defaults[i]) |
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl) | |||
1088 | hlist_for_each_entry(c, n, &q->clhash.hash[h], | 1093 | hlist_for_each_entry(c, n, &q->clhash.hash[h], |
1089 | common.hnode) { | 1094 | common.hnode) { |
1090 | if (c->split == split && c->level < level && | 1095 | if (c->split == split && c->level < level && |
1091 | c->defmap&(1<<i)) { | 1096 | c->defmap & (1<<i)) { |
1092 | split->defaults[i] = c; | 1097 | split->defaults[i] = c; |
1093 | level = c->level; | 1098 | level = c->level; |
1094 | } | 1099 | } |
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma | |||
1102 | struct cbq_class *split = NULL; | 1107 | struct cbq_class *split = NULL; |
1103 | 1108 | ||
1104 | if (splitid == 0) { | 1109 | if (splitid == 0) { |
1105 | if ((split = cl->split) == NULL) | 1110 | split = cl->split; |
1111 | if (!split) | ||
1106 | return; | 1112 | return; |
1107 | splitid = split->common.classid; | 1113 | splitid = split->common.classid; |
1108 | } | 1114 | } |
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma | |||
1120 | cl->defmap = 0; | 1126 | cl->defmap = 0; |
1121 | cbq_sync_defmap(cl); | 1127 | cbq_sync_defmap(cl); |
1122 | cl->split = split; | 1128 | cl->split = split; |
1123 | cl->defmap = def&mask; | 1129 | cl->defmap = def & mask; |
1124 | } else | 1130 | } else |
1125 | cl->defmap = (cl->defmap&~mask)|(def&mask); | 1131 | cl->defmap = (cl->defmap & ~mask) | (def & mask); |
1126 | 1132 | ||
1127 | cbq_sync_defmap(cl); | 1133 | cbq_sync_defmap(cl); |
1128 | } | 1134 | } |
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this) | |||
1135 | qdisc_class_hash_remove(&q->clhash, &this->common); | 1141 | qdisc_class_hash_remove(&q->clhash, &this->common); |
1136 | 1142 | ||
1137 | if (this->tparent) { | 1143 | if (this->tparent) { |
1138 | clp=&this->sibling; | 1144 | clp = &this->sibling; |
1139 | cl = *clp; | 1145 | cl = *clp; |
1140 | do { | 1146 | do { |
1141 | if (cl == this) { | 1147 | if (cl == this) { |
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this) | |||
1174 | } | 1180 | } |
1175 | } | 1181 | } |
1176 | 1182 | ||
1177 | static unsigned int cbq_drop(struct Qdisc* sch) | 1183 | static unsigned int cbq_drop(struct Qdisc *sch) |
1178 | { | 1184 | { |
1179 | struct cbq_sched_data *q = qdisc_priv(sch); | 1185 | struct cbq_sched_data *q = qdisc_priv(sch); |
1180 | struct cbq_class *cl, *cl_head; | 1186 | struct cbq_class *cl, *cl_head; |
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch) | |||
1182 | unsigned int len; | 1188 | unsigned int len; |
1183 | 1189 | ||
1184 | for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { | 1190 | for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { |
1185 | if ((cl_head = q->active[prio]) == NULL) | 1191 | cl_head = q->active[prio]; |
1192 | if (!cl_head) | ||
1186 | continue; | 1193 | continue; |
1187 | 1194 | ||
1188 | cl = cl_head; | 1195 | cl = cl_head; |
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch) | |||
1199 | } | 1206 | } |
1200 | 1207 | ||
1201 | static void | 1208 | static void |
1202 | cbq_reset(struct Qdisc* sch) | 1209 | cbq_reset(struct Qdisc *sch) |
1203 | { | 1210 | { |
1204 | struct cbq_sched_data *q = qdisc_priv(sch); | 1211 | struct cbq_sched_data *q = qdisc_priv(sch); |
1205 | struct cbq_class *cl; | 1212 | struct cbq_class *cl; |
1206 | struct hlist_node *n; | 1213 | struct hlist_node *n; |
1207 | int prio; | 1214 | int prio; |
1208 | unsigned h; | 1215 | unsigned int h; |
1209 | 1216 | ||
1210 | q->activemask = 0; | 1217 | q->activemask = 0; |
1211 | q->pmask = 0; | 1218 | q->pmask = 0; |
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch) | |||
1237 | 1244 | ||
1238 | static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) | 1245 | static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) |
1239 | { | 1246 | { |
1240 | if (lss->change&TCF_CBQ_LSS_FLAGS) { | 1247 | if (lss->change & TCF_CBQ_LSS_FLAGS) { |
1241 | cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; | 1248 | cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; |
1242 | cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; | 1249 | cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; |
1243 | } | 1250 | } |
1244 | if (lss->change&TCF_CBQ_LSS_EWMA) | 1251 | if (lss->change & TCF_CBQ_LSS_EWMA) |
1245 | cl->ewma_log = lss->ewma_log; | 1252 | cl->ewma_log = lss->ewma_log; |
1246 | if (lss->change&TCF_CBQ_LSS_AVPKT) | 1253 | if (lss->change & TCF_CBQ_LSS_AVPKT) |
1247 | cl->avpkt = lss->avpkt; | 1254 | cl->avpkt = lss->avpkt; |
1248 | if (lss->change&TCF_CBQ_LSS_MINIDLE) | 1255 | if (lss->change & TCF_CBQ_LSS_MINIDLE) |
1249 | cl->minidle = -(long)lss->minidle; | 1256 | cl->minidle = -(long)lss->minidle; |
1250 | if (lss->change&TCF_CBQ_LSS_MAXIDLE) { | 1257 | if (lss->change & TCF_CBQ_LSS_MAXIDLE) { |
1251 | cl->maxidle = lss->maxidle; | 1258 | cl->maxidle = lss->maxidle; |
1252 | cl->avgidle = lss->maxidle; | 1259 | cl->avgidle = lss->maxidle; |
1253 | } | 1260 | } |
1254 | if (lss->change&TCF_CBQ_LSS_OFFTIME) | 1261 | if (lss->change & TCF_CBQ_LSS_OFFTIME) |
1255 | cl->offtime = lss->offtime; | 1262 | cl->offtime = lss->offtime; |
1256 | return 0; | 1263 | return 0; |
1257 | } | 1264 | } |
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) | |||
1279 | if (wrr->weight) | 1286 | if (wrr->weight) |
1280 | cl->weight = wrr->weight; | 1287 | cl->weight = wrr->weight; |
1281 | if (wrr->priority) { | 1288 | if (wrr->priority) { |
1282 | cl->priority = wrr->priority-1; | 1289 | cl->priority = wrr->priority - 1; |
1283 | cl->cpriority = cl->priority; | 1290 | cl->cpriority = cl->priority; |
1284 | if (cl->priority >= cl->priority2) | 1291 | if (cl->priority >= cl->priority2) |
1285 | cl->priority2 = TC_CBQ_MAXPRIO-1; | 1292 | cl->priority2 = TC_CBQ_MAXPRIO - 1; |
1286 | } | 1293 | } |
1287 | 1294 | ||
1288 | cbq_addprio(q, cl); | 1295 | cbq_addprio(q, cl); |
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) | |||
1299 | cl->overlimit = cbq_ovl_delay; | 1306 | cl->overlimit = cbq_ovl_delay; |
1300 | break; | 1307 | break; |
1301 | case TC_CBQ_OVL_LOWPRIO: | 1308 | case TC_CBQ_OVL_LOWPRIO: |
1302 | if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || | 1309 | if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO || |
1303 | ovl->priority2-1 <= cl->priority) | 1310 | ovl->priority2 - 1 <= cl->priority) |
1304 | return -EINVAL; | 1311 | return -EINVAL; |
1305 | cl->priority2 = ovl->priority2-1; | 1312 | cl->priority2 = ovl->priority2 - 1; |
1306 | cl->overlimit = cbq_ovl_lowprio; | 1313 | cl->overlimit = cbq_ovl_lowprio; |
1307 | break; | 1314 | break; |
1308 | case TC_CBQ_OVL_DROP: | 1315 | case TC_CBQ_OVL_DROP: |
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) | |||
1381 | if (!q->link.q) | 1388 | if (!q->link.q) |
1382 | q->link.q = &noop_qdisc; | 1389 | q->link.q = &noop_qdisc; |
1383 | 1390 | ||
1384 | q->link.priority = TC_CBQ_MAXPRIO-1; | 1391 | q->link.priority = TC_CBQ_MAXPRIO - 1; |
1385 | q->link.priority2 = TC_CBQ_MAXPRIO-1; | 1392 | q->link.priority2 = TC_CBQ_MAXPRIO - 1; |
1386 | q->link.cpriority = TC_CBQ_MAXPRIO-1; | 1393 | q->link.cpriority = TC_CBQ_MAXPRIO - 1; |
1387 | q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; | 1394 | q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; |
1388 | q->link.overlimit = cbq_ovl_classic; | 1395 | q->link.overlimit = cbq_ovl_classic; |
1389 | q->link.allot = psched_mtu(qdisc_dev(sch)); | 1396 | q->link.allot = psched_mtu(qdisc_dev(sch)); |
@@ -1414,7 +1421,7 @@ put_rtab: | |||
1414 | return err; | 1421 | return err; |
1415 | } | 1422 | } |
1416 | 1423 | ||
1417 | static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) | 1424 | static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) |
1418 | { | 1425 | { |
1419 | unsigned char *b = skb_tail_pointer(skb); | 1426 | unsigned char *b = skb_tail_pointer(skb); |
1420 | 1427 | ||
@@ -1426,7 +1433,7 @@ nla_put_failure: | |||
1426 | return -1; | 1433 | return -1; |
1427 | } | 1434 | } |
1428 | 1435 | ||
1429 | static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) | 1436 | static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) |
1430 | { | 1437 | { |
1431 | unsigned char *b = skb_tail_pointer(skb); | 1438 | unsigned char *b = skb_tail_pointer(skb); |
1432 | struct tc_cbq_lssopt opt; | 1439 | struct tc_cbq_lssopt opt; |
@@ -1451,15 +1458,15 @@ nla_put_failure: | |||
1451 | return -1; | 1458 | return -1; |
1452 | } | 1459 | } |
1453 | 1460 | ||
1454 | static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) | 1461 | static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) |
1455 | { | 1462 | { |
1456 | unsigned char *b = skb_tail_pointer(skb); | 1463 | unsigned char *b = skb_tail_pointer(skb); |
1457 | struct tc_cbq_wrropt opt; | 1464 | struct tc_cbq_wrropt opt; |
1458 | 1465 | ||
1459 | opt.flags = 0; | 1466 | opt.flags = 0; |
1460 | opt.allot = cl->allot; | 1467 | opt.allot = cl->allot; |
1461 | opt.priority = cl->priority+1; | 1468 | opt.priority = cl->priority + 1; |
1462 | opt.cpriority = cl->cpriority+1; | 1469 | opt.cpriority = cl->cpriority + 1; |
1463 | opt.weight = cl->weight; | 1470 | opt.weight = cl->weight; |
1464 | NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); | 1471 | NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); |
1465 | return skb->len; | 1472 | return skb->len; |
@@ -1469,13 +1476,13 @@ nla_put_failure: | |||
1469 | return -1; | 1476 | return -1; |
1470 | } | 1477 | } |
1471 | 1478 | ||
1472 | static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) | 1479 | static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) |
1473 | { | 1480 | { |
1474 | unsigned char *b = skb_tail_pointer(skb); | 1481 | unsigned char *b = skb_tail_pointer(skb); |
1475 | struct tc_cbq_ovl opt; | 1482 | struct tc_cbq_ovl opt; |
1476 | 1483 | ||
1477 | opt.strategy = cl->ovl_strategy; | 1484 | opt.strategy = cl->ovl_strategy; |
1478 | opt.priority2 = cl->priority2+1; | 1485 | opt.priority2 = cl->priority2 + 1; |
1479 | opt.pad = 0; | 1486 | opt.pad = 0; |
1480 | opt.penalty = cl->penalty; | 1487 | opt.penalty = cl->penalty; |
1481 | NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); | 1488 | NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); |
@@ -1486,7 +1493,7 @@ nla_put_failure: | |||
1486 | return -1; | 1493 | return -1; |
1487 | } | 1494 | } |
1488 | 1495 | ||
1489 | static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) | 1496 | static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) |
1490 | { | 1497 | { |
1491 | unsigned char *b = skb_tail_pointer(skb); | 1498 | unsigned char *b = skb_tail_pointer(skb); |
1492 | struct tc_cbq_fopt opt; | 1499 | struct tc_cbq_fopt opt; |
@@ -1505,7 +1512,7 @@ nla_put_failure: | |||
1505 | } | 1512 | } |
1506 | 1513 | ||
1507 | #ifdef CONFIG_NET_CLS_ACT | 1514 | #ifdef CONFIG_NET_CLS_ACT |
1508 | static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) | 1515 | static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) |
1509 | { | 1516 | { |
1510 | unsigned char *b = skb_tail_pointer(skb); | 1517 | unsigned char *b = skb_tail_pointer(skb); |
1511 | struct tc_cbq_police opt; | 1518 | struct tc_cbq_police opt; |
@@ -1569,7 +1576,7 @@ static int | |||
1569 | cbq_dump_class(struct Qdisc *sch, unsigned long arg, | 1576 | cbq_dump_class(struct Qdisc *sch, unsigned long arg, |
1570 | struct sk_buff *skb, struct tcmsg *tcm) | 1577 | struct sk_buff *skb, struct tcmsg *tcm) |
1571 | { | 1578 | { |
1572 | struct cbq_class *cl = (struct cbq_class*)arg; | 1579 | struct cbq_class *cl = (struct cbq_class *)arg; |
1573 | struct nlattr *nest; | 1580 | struct nlattr *nest; |
1574 | 1581 | ||
1575 | if (cl->tparent) | 1582 | if (cl->tparent) |
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1597 | struct gnet_dump *d) | 1604 | struct gnet_dump *d) |
1598 | { | 1605 | { |
1599 | struct cbq_sched_data *q = qdisc_priv(sch); | 1606 | struct cbq_sched_data *q = qdisc_priv(sch); |
1600 | struct cbq_class *cl = (struct cbq_class*)arg; | 1607 | struct cbq_class *cl = (struct cbq_class *)arg; |
1601 | 1608 | ||
1602 | cl->qstats.qlen = cl->q->q.qlen; | 1609 | cl->qstats.qlen = cl->q->q.qlen; |
1603 | cl->xstats.avgidle = cl->avgidle; | 1610 | cl->xstats.avgidle = cl->avgidle; |
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1617 | static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | 1624 | static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, |
1618 | struct Qdisc **old) | 1625 | struct Qdisc **old) |
1619 | { | 1626 | { |
1620 | struct cbq_class *cl = (struct cbq_class*)arg; | 1627 | struct cbq_class *cl = (struct cbq_class *)arg; |
1621 | 1628 | ||
1622 | if (new == NULL) { | 1629 | if (new == NULL) { |
1623 | new = qdisc_create_dflt(sch->dev_queue, | 1630 | new = qdisc_create_dflt(sch->dev_queue, |
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
1640 | return 0; | 1647 | return 0; |
1641 | } | 1648 | } |
1642 | 1649 | ||
1643 | static struct Qdisc * | 1650 | static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) |
1644 | cbq_leaf(struct Qdisc *sch, unsigned long arg) | ||
1645 | { | 1651 | { |
1646 | struct cbq_class *cl = (struct cbq_class*)arg; | 1652 | struct cbq_class *cl = (struct cbq_class *)arg; |
1647 | 1653 | ||
1648 | return cl->q; | 1654 | return cl->q; |
1649 | } | 1655 | } |
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) | |||
1682 | kfree(cl); | 1688 | kfree(cl); |
1683 | } | 1689 | } |
1684 | 1690 | ||
1685 | static void | 1691 | static void cbq_destroy(struct Qdisc *sch) |
1686 | cbq_destroy(struct Qdisc* sch) | ||
1687 | { | 1692 | { |
1688 | struct cbq_sched_data *q = qdisc_priv(sch); | 1693 | struct cbq_sched_data *q = qdisc_priv(sch); |
1689 | struct hlist_node *n, *next; | 1694 | struct hlist_node *n, *next; |
1690 | struct cbq_class *cl; | 1695 | struct cbq_class *cl; |
1691 | unsigned h; | 1696 | unsigned int h; |
1692 | 1697 | ||
1693 | #ifdef CONFIG_NET_CLS_ACT | 1698 | #ifdef CONFIG_NET_CLS_ACT |
1694 | q->rx_class = NULL; | 1699 | q->rx_class = NULL; |
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch) | |||
1712 | 1717 | ||
1713 | static void cbq_put(struct Qdisc *sch, unsigned long arg) | 1718 | static void cbq_put(struct Qdisc *sch, unsigned long arg) |
1714 | { | 1719 | { |
1715 | struct cbq_class *cl = (struct cbq_class*)arg; | 1720 | struct cbq_class *cl = (struct cbq_class *)arg; |
1716 | 1721 | ||
1717 | if (--cl->refcnt == 0) { | 1722 | if (--cl->refcnt == 0) { |
1718 | #ifdef CONFIG_NET_CLS_ACT | 1723 | #ifdef CONFIG_NET_CLS_ACT |
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1735 | { | 1740 | { |
1736 | int err; | 1741 | int err; |
1737 | struct cbq_sched_data *q = qdisc_priv(sch); | 1742 | struct cbq_sched_data *q = qdisc_priv(sch); |
1738 | struct cbq_class *cl = (struct cbq_class*)*arg; | 1743 | struct cbq_class *cl = (struct cbq_class *)*arg; |
1739 | struct nlattr *opt = tca[TCA_OPTIONS]; | 1744 | struct nlattr *opt = tca[TCA_OPTIONS]; |
1740 | struct nlattr *tb[TCA_CBQ_MAX + 1]; | 1745 | struct nlattr *tb[TCA_CBQ_MAX + 1]; |
1741 | struct cbq_class *parent; | 1746 | struct cbq_class *parent; |
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1827 | 1832 | ||
1828 | if (classid) { | 1833 | if (classid) { |
1829 | err = -EINVAL; | 1834 | err = -EINVAL; |
1830 | if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) | 1835 | if (TC_H_MAJ(classid ^ sch->handle) || |
1836 | cbq_class_lookup(q, classid)) | ||
1831 | goto failure; | 1837 | goto failure; |
1832 | } else { | 1838 | } else { |
1833 | int i; | 1839 | int i; |
1834 | classid = TC_H_MAKE(sch->handle,0x8000); | 1840 | classid = TC_H_MAKE(sch->handle, 0x8000); |
1835 | 1841 | ||
1836 | for (i=0; i<0x8000; i++) { | 1842 | for (i = 0; i < 0x8000; i++) { |
1837 | if (++q->hgenerator >= 0x8000) | 1843 | if (++q->hgenerator >= 0x8000) |
1838 | q->hgenerator = 1; | 1844 | q->hgenerator = 1; |
1839 | if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) | 1845 | if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) |
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1890 | cl->minidle = -0x7FFFFFFF; | 1896 | cl->minidle = -0x7FFFFFFF; |
1891 | cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); | 1897 | cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); |
1892 | cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); | 1898 | cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); |
1893 | if (cl->ewma_log==0) | 1899 | if (cl->ewma_log == 0) |
1894 | cl->ewma_log = q->link.ewma_log; | 1900 | cl->ewma_log = q->link.ewma_log; |
1895 | if (cl->maxidle==0) | 1901 | if (cl->maxidle == 0) |
1896 | cl->maxidle = q->link.maxidle; | 1902 | cl->maxidle = q->link.maxidle; |
1897 | if (cl->avpkt==0) | 1903 | if (cl->avpkt == 0) |
1898 | cl->avpkt = q->link.avpkt; | 1904 | cl->avpkt = q->link.avpkt; |
1899 | cl->overlimit = cbq_ovl_classic; | 1905 | cl->overlimit = cbq_ovl_classic; |
1900 | if (tb[TCA_CBQ_OVL_STRATEGY]) | 1906 | if (tb[TCA_CBQ_OVL_STRATEGY]) |
@@ -1920,7 +1926,7 @@ failure: | |||
1920 | static int cbq_delete(struct Qdisc *sch, unsigned long arg) | 1926 | static int cbq_delete(struct Qdisc *sch, unsigned long arg) |
1921 | { | 1927 | { |
1922 | struct cbq_sched_data *q = qdisc_priv(sch); | 1928 | struct cbq_sched_data *q = qdisc_priv(sch); |
1923 | struct cbq_class *cl = (struct cbq_class*)arg; | 1929 | struct cbq_class *cl = (struct cbq_class *)arg; |
1924 | unsigned int qlen; | 1930 | unsigned int qlen; |
1925 | 1931 | ||
1926 | if (cl->filters || cl->children || cl == &q->link) | 1932 | if (cl->filters || cl->children || cl == &q->link) |
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1978 | u32 classid) | 1984 | u32 classid) |
1979 | { | 1985 | { |
1980 | struct cbq_sched_data *q = qdisc_priv(sch); | 1986 | struct cbq_sched_data *q = qdisc_priv(sch); |
1981 | struct cbq_class *p = (struct cbq_class*)parent; | 1987 | struct cbq_class *p = (struct cbq_class *)parent; |
1982 | struct cbq_class *cl = cbq_class_lookup(q, classid); | 1988 | struct cbq_class *cl = cbq_class_lookup(q, classid); |
1983 | 1989 | ||
1984 | if (cl) { | 1990 | if (cl) { |
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1992 | 1998 | ||
1993 | static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) | 1999 | static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) |
1994 | { | 2000 | { |
1995 | struct cbq_class *cl = (struct cbq_class*)arg; | 2001 | struct cbq_class *cl = (struct cbq_class *)arg; |
1996 | 2002 | ||
1997 | cl->filters--; | 2003 | cl->filters--; |
1998 | } | 2004 | } |
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
2002 | struct cbq_sched_data *q = qdisc_priv(sch); | 2008 | struct cbq_sched_data *q = qdisc_priv(sch); |
2003 | struct cbq_class *cl; | 2009 | struct cbq_class *cl; |
2004 | struct hlist_node *n; | 2010 | struct hlist_node *n; |
2005 | unsigned h; | 2011 | unsigned int h; |
2006 | 2012 | ||
2007 | if (arg->stop) | 2013 | if (arg->stop) |
2008 | return; | 2014 | return; |
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0f7bf3fdfea5..2c790204d042 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c | |||
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, | |||
137 | mask = nla_get_u8(tb[TCA_DSMARK_MASK]); | 137 | mask = nla_get_u8(tb[TCA_DSMARK_MASK]); |
138 | 138 | ||
139 | if (tb[TCA_DSMARK_VALUE]) | 139 | if (tb[TCA_DSMARK_VALUE]) |
140 | p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); | 140 | p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); |
141 | 141 | ||
142 | if (tb[TCA_DSMARK_MASK]) | 142 | if (tb[TCA_DSMARK_MASK]) |
143 | p->mask[*arg-1] = mask; | 143 | p->mask[*arg - 1] = mask; |
144 | 144 | ||
145 | err = 0; | 145 | err = 0; |
146 | 146 | ||
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg) | |||
155 | if (!dsmark_valid_index(p, arg)) | 155 | if (!dsmark_valid_index(p, arg)) |
156 | return -EINVAL; | 156 | return -EINVAL; |
157 | 157 | ||
158 | p->mask[arg-1] = 0xff; | 158 | p->mask[arg - 1] = 0xff; |
159 | p->value[arg-1] = 0; | 159 | p->value[arg - 1] = 0; |
160 | 160 | ||
161 | return 0; | 161 | return 0; |
162 | } | 162 | } |
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) | |||
175 | if (p->mask[i] == 0xff && !p->value[i]) | 175 | if (p->mask[i] == 0xff && !p->value[i]) |
176 | goto ignore; | 176 | goto ignore; |
177 | if (walker->count >= walker->skip) { | 177 | if (walker->count >= walker->skip) { |
178 | if (walker->fn(sch, i+1, walker) < 0) { | 178 | if (walker->fn(sch, i + 1, walker) < 0) { |
179 | walker->stop = 1; | 179 | walker->stop = 1; |
180 | break; | 180 | break; |
181 | } | 181 | } |
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) | |||
304 | * and don't need yet another qdisc as a bypass. | 304 | * and don't need yet another qdisc as a bypass. |
305 | */ | 305 | */ |
306 | if (p->mask[index] != 0xff || p->value[index]) | 306 | if (p->mask[index] != 0xff || p->value[index]) |
307 | printk(KERN_WARNING | 307 | pr_warning("dsmark_dequeue: unsupported protocol %d\n", |
308 | "dsmark_dequeue: unsupported protocol %d\n", | 308 | ntohs(skb->protocol)); |
309 | ntohs(skb->protocol)); | ||
310 | break; | 309 | break; |
311 | } | 310 | } |
312 | 311 | ||
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, | |||
424 | if (!dsmark_valid_index(p, cl)) | 423 | if (!dsmark_valid_index(p, cl)) |
425 | return -EINVAL; | 424 | return -EINVAL; |
426 | 425 | ||
427 | tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); | 426 | tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); |
428 | tcm->tcm_info = p->q->handle; | 427 | tcm->tcm_info = p->q->handle; |
429 | 428 | ||
430 | opts = nla_nest_start(skb, TCA_OPTIONS); | 429 | opts = nla_nest_start(skb, TCA_OPTIONS); |
431 | if (opts == NULL) | 430 | if (opts == NULL) |
432 | goto nla_put_failure; | 431 | goto nla_put_failure; |
433 | NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); | 432 | NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]); |
434 | NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); | 433 | NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]); |
435 | 434 | ||
436 | return nla_nest_end(skb, opts); | 435 | return nla_nest_end(skb, opts); |
437 | 436 | ||
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index d468b479aa93..be33f9ddf9dd 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c | |||
@@ -19,12 +19,11 @@ | |||
19 | 19 | ||
20 | /* 1 band FIFO pseudo-"scheduler" */ | 20 | /* 1 band FIFO pseudo-"scheduler" */ |
21 | 21 | ||
22 | struct fifo_sched_data | 22 | struct fifo_sched_data { |
23 | { | ||
24 | u32 limit; | 23 | u32 limit; |
25 | }; | 24 | }; |
26 | 25 | ||
27 | static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 26 | static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
28 | { | 27 | { |
29 | struct fifo_sched_data *q = qdisc_priv(sch); | 28 | struct fifo_sched_data *q = qdisc_priv(sch); |
30 | 29 | ||
@@ -34,7 +33,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
34 | return qdisc_reshape_fail(skb, sch); | 33 | return qdisc_reshape_fail(skb, sch); |
35 | } | 34 | } |
36 | 35 | ||
37 | static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 36 | static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
38 | { | 37 | { |
39 | struct fifo_sched_data *q = qdisc_priv(sch); | 38 | struct fifo_sched_data *q = qdisc_priv(sch); |
40 | 39 | ||
@@ -44,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
44 | return qdisc_reshape_fail(skb, sch); | 43 | return qdisc_reshape_fail(skb, sch); |
45 | } | 44 | } |
46 | 45 | ||
47 | static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 46 | static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
48 | { | 47 | { |
49 | struct fifo_sched_data *q = qdisc_priv(sch); | 48 | struct fifo_sched_data *q = qdisc_priv(sch); |
50 | 49 | ||
@@ -62,11 +61,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
62 | static int fifo_init(struct Qdisc *sch, struct nlattr *opt) | 61 | static int fifo_init(struct Qdisc *sch, struct nlattr *opt) |
63 | { | 62 | { |
64 | struct fifo_sched_data *q = qdisc_priv(sch); | 63 | struct fifo_sched_data *q = qdisc_priv(sch); |
64 | bool bypass; | ||
65 | bool is_bfifo = sch->ops == &bfifo_qdisc_ops; | ||
65 | 66 | ||
66 | if (opt == NULL) { | 67 | if (opt == NULL) { |
67 | u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; | 68 | u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; |
68 | 69 | ||
69 | if (sch->ops == &bfifo_qdisc_ops) | 70 | if (is_bfifo) |
70 | limit *= psched_mtu(qdisc_dev(sch)); | 71 | limit *= psched_mtu(qdisc_dev(sch)); |
71 | 72 | ||
72 | q->limit = limit; | 73 | q->limit = limit; |
@@ -79,6 +80,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) | |||
79 | q->limit = ctl->limit; | 80 | q->limit = ctl->limit; |
80 | } | 81 | } |
81 | 82 | ||
83 | if (is_bfifo) | ||
84 | bypass = q->limit >= psched_mtu(qdisc_dev(sch)); | ||
85 | else | ||
86 | bypass = q->limit >= 1; | ||
87 | |||
88 | if (bypass) | ||
89 | sch->flags |= TCQ_F_CAN_BYPASS; | ||
90 | else | ||
91 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
82 | return 0; | 92 | return 0; |
83 | } | 93 | } |
84 | 94 | ||
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 34dc598440a2..0da09d508737 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
87 | */ | 87 | */ |
88 | kfree_skb(skb); | 88 | kfree_skb(skb); |
89 | if (net_ratelimit()) | 89 | if (net_ratelimit()) |
90 | printk(KERN_WARNING "Dead loop on netdevice %s, " | 90 | pr_warning("Dead loop on netdevice %s, fix it urgently!\n", |
91 | "fix it urgently!\n", dev_queue->dev->name); | 91 | dev_queue->dev->name); |
92 | ret = qdisc_qlen(q); | 92 | ret = qdisc_qlen(q); |
93 | } else { | 93 | } else { |
94 | /* | 94 | /* |
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, | |||
137 | } else { | 137 | } else { |
138 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ | 138 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ |
139 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) | 139 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) |
140 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", | 140 | pr_warning("BUG %s code %d qlen %d\n", |
141 | dev->name, ret, q->q.qlen); | 141 | dev->name, ret, q->q.qlen); |
142 | 142 | ||
143 | ret = dev_requeue_skb(skb, q); | 143 | ret = dev_requeue_skb(skb, q); |
144 | } | 144 | } |
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = { | |||
412 | }; | 412 | }; |
413 | 413 | ||
414 | 414 | ||
415 | static const u8 prio2band[TC_PRIO_MAX+1] = | 415 | static const u8 prio2band[TC_PRIO_MAX + 1] = { |
416 | { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; | 416 | 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 |
417 | }; | ||
417 | 418 | ||
418 | /* 3-band FIFO queue: old style, but should be a bit faster than | 419 | /* 3-band FIFO queue: old style, but should be a bit faster than |
419 | generic prio+fifo combination. | 420 | generic prio+fifo combination. |
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, | |||
445 | return priv->q + band; | 446 | return priv->q + band; |
446 | } | 447 | } |
447 | 448 | ||
448 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | 449 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) |
449 | { | 450 | { |
450 | if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { | 451 | if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { |
451 | int band = prio2band[skb->priority & TC_PRIO_MAX]; | 452 | int band = prio2band[skb->priority & TC_PRIO_MAX]; |
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | |||
460 | return qdisc_drop(skb, qdisc); | 461 | return qdisc_drop(skb, qdisc); |
461 | } | 462 | } |
462 | 463 | ||
463 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | 464 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) |
464 | { | 465 | { |
465 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 466 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
466 | int band = bitmap2band[priv->bitmap]; | 467 | int band = bitmap2band[priv->bitmap]; |
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | |||
479 | return NULL; | 480 | return NULL; |
480 | } | 481 | } |
481 | 482 | ||
482 | static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | 483 | static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) |
483 | { | 484 | { |
484 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 485 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
485 | int band = bitmap2band[priv->bitmap]; | 486 | int band = bitmap2band[priv->bitmap]; |
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | |||
493 | return NULL; | 494 | return NULL; |
494 | } | 495 | } |
495 | 496 | ||
496 | static void pfifo_fast_reset(struct Qdisc* qdisc) | 497 | static void pfifo_fast_reset(struct Qdisc *qdisc) |
497 | { | 498 | { |
498 | int prio; | 499 | int prio; |
499 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 500 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) | |||
510 | { | 511 | { |
511 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; | 512 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; |
512 | 513 | ||
513 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); | 514 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); |
514 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 515 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
515 | return skb->len; | 516 | return skb->len; |
516 | 517 | ||
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) | |||
526 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | 527 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
527 | skb_queue_head_init(band2list(priv, prio)); | 528 | skb_queue_head_init(band2list(priv, prio)); |
528 | 529 | ||
530 | /* Can by-pass the queue discipline */ | ||
531 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
529 | return 0; | 532 | return 0; |
530 | } | 533 | } |
531 | 534 | ||
@@ -540,6 +543,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { | |||
540 | .dump = pfifo_fast_dump, | 543 | .dump = pfifo_fast_dump, |
541 | .owner = THIS_MODULE, | 544 | .owner = THIS_MODULE, |
542 | }; | 545 | }; |
546 | EXPORT_SYMBOL(pfifo_fast_ops); | ||
543 | 547 | ||
544 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | 548 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
545 | struct Qdisc_ops *ops) | 549 | struct Qdisc_ops *ops) |
@@ -630,7 +634,7 @@ void qdisc_destroy(struct Qdisc *qdisc) | |||
630 | #ifdef CONFIG_NET_SCHED | 634 | #ifdef CONFIG_NET_SCHED |
631 | qdisc_list_del(qdisc); | 635 | qdisc_list_del(qdisc); |
632 | 636 | ||
633 | qdisc_put_stab(qdisc->stab); | 637 | qdisc_put_stab(rtnl_dereference(qdisc->stab)); |
634 | #endif | 638 | #endif |
635 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 639 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
636 | if (ops->reset) | 640 | if (ops->reset) |
@@ -674,25 +678,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, | |||
674 | 678 | ||
675 | return oqdisc; | 679 | return oqdisc; |
676 | } | 680 | } |
681 | EXPORT_SYMBOL(dev_graft_qdisc); | ||
677 | 682 | ||
678 | static void attach_one_default_qdisc(struct net_device *dev, | 683 | static void attach_one_default_qdisc(struct net_device *dev, |
679 | struct netdev_queue *dev_queue, | 684 | struct netdev_queue *dev_queue, |
680 | void *_unused) | 685 | void *_unused) |
681 | { | 686 | { |
682 | struct Qdisc *qdisc; | 687 | struct Qdisc *qdisc = &noqueue_qdisc; |
683 | 688 | ||
684 | if (dev->tx_queue_len) { | 689 | if (dev->tx_queue_len) { |
685 | qdisc = qdisc_create_dflt(dev_queue, | 690 | qdisc = qdisc_create_dflt(dev_queue, |
686 | &pfifo_fast_ops, TC_H_ROOT); | 691 | &pfifo_fast_ops, TC_H_ROOT); |
687 | if (!qdisc) { | 692 | if (!qdisc) { |
688 | printk(KERN_INFO "%s: activation failed\n", dev->name); | 693 | netdev_info(dev, "activation failed\n"); |
689 | return; | 694 | return; |
690 | } | 695 | } |
691 | |||
692 | /* Can by-pass the queue discipline for default qdisc */ | ||
693 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
694 | } else { | ||
695 | qdisc = &noqueue_qdisc; | ||
696 | } | 696 | } |
697 | dev_queue->qdisc_sleeping = qdisc; | 697 | dev_queue->qdisc_sleeping = qdisc; |
698 | } | 698 | } |
@@ -761,6 +761,7 @@ void dev_activate(struct net_device *dev) | |||
761 | dev_watchdog_up(dev); | 761 | dev_watchdog_up(dev); |
762 | } | 762 | } |
763 | } | 763 | } |
764 | EXPORT_SYMBOL(dev_activate); | ||
764 | 765 | ||
765 | static void dev_deactivate_queue(struct net_device *dev, | 766 | static void dev_deactivate_queue(struct net_device *dev, |
766 | struct netdev_queue *dev_queue, | 767 | struct netdev_queue *dev_queue, |
@@ -840,6 +841,7 @@ void dev_deactivate(struct net_device *dev) | |||
840 | list_add(&dev->unreg_list, &single); | 841 | list_add(&dev->unreg_list, &single); |
841 | dev_deactivate_many(&single); | 842 | dev_deactivate_many(&single); |
842 | } | 843 | } |
844 | EXPORT_SYMBOL(dev_deactivate); | ||
843 | 845 | ||
844 | static void dev_init_scheduler_queue(struct net_device *dev, | 846 | static void dev_init_scheduler_queue(struct net_device *dev, |
845 | struct netdev_queue *dev_queue, | 847 | struct netdev_queue *dev_queue, |
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 51dcc2aa5c92..b9493a09a870 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
@@ -32,8 +32,7 @@ | |||
32 | struct gred_sched_data; | 32 | struct gred_sched_data; |
33 | struct gred_sched; | 33 | struct gred_sched; |
34 | 34 | ||
35 | struct gred_sched_data | 35 | struct gred_sched_data { |
36 | { | ||
37 | u32 limit; /* HARD maximal queue length */ | 36 | u32 limit; /* HARD maximal queue length */ |
38 | u32 DP; /* the drop pramaters */ | 37 | u32 DP; /* the drop pramaters */ |
39 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 38 | u32 bytesin; /* bytes seen on virtualQ so far*/ |
@@ -50,8 +49,7 @@ enum { | |||
50 | GRED_RIO_MODE, | 49 | GRED_RIO_MODE, |
51 | }; | 50 | }; |
52 | 51 | ||
53 | struct gred_sched | 52 | struct gred_sched { |
54 | { | ||
55 | struct gred_sched_data *tab[MAX_DPs]; | 53 | struct gred_sched_data *tab[MAX_DPs]; |
56 | unsigned long flags; | 54 | unsigned long flags; |
57 | u32 red_flags; | 55 | u32 red_flags; |
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t) | |||
150 | return t->red_flags & TC_RED_HARDDROP; | 148 | return t->red_flags & TC_RED_HARDDROP; |
151 | } | 149 | } |
152 | 150 | ||
153 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 151 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
154 | { | 152 | { |
155 | struct gred_sched_data *q=NULL; | 153 | struct gred_sched_data *q = NULL; |
156 | struct gred_sched *t= qdisc_priv(sch); | 154 | struct gred_sched *t = qdisc_priv(sch); |
157 | unsigned long qavg = 0; | 155 | unsigned long qavg = 0; |
158 | u16 dp = tc_index_to_dp(skb); | 156 | u16 dp = tc_index_to_dp(skb); |
159 | 157 | ||
160 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 158 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
161 | dp = t->def; | 159 | dp = t->def; |
162 | 160 | ||
163 | if ((q = t->tab[dp]) == NULL) { | 161 | q = t->tab[dp]; |
162 | if (!q) { | ||
164 | /* Pass through packets not assigned to a DP | 163 | /* Pass through packets not assigned to a DP |
165 | * if no default DP has been configured. This | 164 | * if no default DP has been configured. This |
166 | * allows for DP flows to be left untouched. | 165 | * allows for DP flows to be left untouched. |
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
183 | for (i = 0; i < t->DPs; i++) { | 182 | for (i = 0; i < t->DPs; i++) { |
184 | if (t->tab[i] && t->tab[i]->prio < q->prio && | 183 | if (t->tab[i] && t->tab[i]->prio < q->prio && |
185 | !red_is_idling(&t->tab[i]->parms)) | 184 | !red_is_idling(&t->tab[i]->parms)) |
186 | qavg +=t->tab[i]->parms.qavg; | 185 | qavg += t->tab[i]->parms.qavg; |
187 | } | 186 | } |
188 | 187 | ||
189 | } | 188 | } |
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
203 | gred_store_wred_set(t, q); | 202 | gred_store_wred_set(t, q); |
204 | 203 | ||
205 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { | 204 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { |
206 | case RED_DONT_MARK: | 205 | case RED_DONT_MARK: |
207 | break; | 206 | break; |
208 | 207 | ||
209 | case RED_PROB_MARK: | 208 | case RED_PROB_MARK: |
210 | sch->qstats.overlimits++; | 209 | sch->qstats.overlimits++; |
211 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { | 210 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { |
212 | q->stats.prob_drop++; | 211 | q->stats.prob_drop++; |
213 | goto congestion_drop; | 212 | goto congestion_drop; |
214 | } | 213 | } |
215 | 214 | ||
216 | q->stats.prob_mark++; | 215 | q->stats.prob_mark++; |
217 | break; | 216 | break; |
218 | 217 | ||
219 | case RED_HARD_MARK: | 218 | case RED_HARD_MARK: |
220 | sch->qstats.overlimits++; | 219 | sch->qstats.overlimits++; |
221 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || | 220 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || |
222 | !INET_ECN_set_ce(skb)) { | 221 | !INET_ECN_set_ce(skb)) { |
223 | q->stats.forced_drop++; | 222 | q->stats.forced_drop++; |
224 | goto congestion_drop; | 223 | goto congestion_drop; |
225 | } | 224 | } |
226 | q->stats.forced_mark++; | 225 | q->stats.forced_mark++; |
227 | break; | 226 | break; |
228 | } | 227 | } |
229 | 228 | ||
230 | if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { | 229 | if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { |
@@ -241,7 +240,7 @@ congestion_drop: | |||
241 | return NET_XMIT_CN; | 240 | return NET_XMIT_CN; |
242 | } | 241 | } |
243 | 242 | ||
244 | static struct sk_buff *gred_dequeue(struct Qdisc* sch) | 243 | static struct sk_buff *gred_dequeue(struct Qdisc *sch) |
245 | { | 244 | { |
246 | struct sk_buff *skb; | 245 | struct sk_buff *skb; |
247 | struct gred_sched *t = qdisc_priv(sch); | 246 | struct gred_sched *t = qdisc_priv(sch); |
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) | |||
254 | 253 | ||
255 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 254 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
256 | if (net_ratelimit()) | 255 | if (net_ratelimit()) |
257 | printk(KERN_WARNING "GRED: Unable to relocate " | 256 | pr_warning("GRED: Unable to relocate VQ 0x%x " |
258 | "VQ 0x%x after dequeue, screwing up " | 257 | "after dequeue, screwing up " |
259 | "backlog.\n", tc_index_to_dp(skb)); | 258 | "backlog.\n", tc_index_to_dp(skb)); |
260 | } else { | 259 | } else { |
261 | q->backlog -= qdisc_pkt_len(skb); | 260 | q->backlog -= qdisc_pkt_len(skb); |
262 | 261 | ||
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) | |||
273 | return NULL; | 272 | return NULL; |
274 | } | 273 | } |
275 | 274 | ||
276 | static unsigned int gred_drop(struct Qdisc* sch) | 275 | static unsigned int gred_drop(struct Qdisc *sch) |
277 | { | 276 | { |
278 | struct sk_buff *skb; | 277 | struct sk_buff *skb; |
279 | struct gred_sched *t = qdisc_priv(sch); | 278 | struct gred_sched *t = qdisc_priv(sch); |
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
286 | 285 | ||
287 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 286 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
288 | if (net_ratelimit()) | 287 | if (net_ratelimit()) |
289 | printk(KERN_WARNING "GRED: Unable to relocate " | 288 | pr_warning("GRED: Unable to relocate VQ 0x%x " |
290 | "VQ 0x%x while dropping, screwing up " | 289 | "while dropping, screwing up " |
291 | "backlog.\n", tc_index_to_dp(skb)); | 290 | "backlog.\n", tc_index_to_dp(skb)); |
292 | } else { | 291 | } else { |
293 | q->backlog -= len; | 292 | q->backlog -= len; |
294 | q->stats.other++; | 293 | q->stats.other++; |
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
308 | 307 | ||
309 | } | 308 | } |
310 | 309 | ||
311 | static void gred_reset(struct Qdisc* sch) | 310 | static void gred_reset(struct Qdisc *sch) |
312 | { | 311 | { |
313 | int i; | 312 | int i; |
314 | struct gred_sched *t = qdisc_priv(sch); | 313 | struct gred_sched *t = qdisc_priv(sch); |
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) | |||
369 | 368 | ||
370 | for (i = table->DPs; i < MAX_DPs; i++) { | 369 | for (i = table->DPs; i < MAX_DPs; i++) { |
371 | if (table->tab[i]) { | 370 | if (table->tab[i]) { |
372 | printk(KERN_WARNING "GRED: Warning: Destroying " | 371 | pr_warning("GRED: Warning: Destroying " |
373 | "shadowed VQ 0x%x\n", i); | 372 | "shadowed VQ 0x%x\n", i); |
374 | gred_destroy_vq(table->tab[i]); | 373 | gred_destroy_vq(table->tab[i]); |
375 | table->tab[i] = NULL; | 374 | table->tab[i] = NULL; |
376 | } | 375 | } |
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 14a799de1c35..6488e6425652 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c | |||
@@ -81,8 +81,7 @@ | |||
81 | * that are expensive on 32-bit architectures. | 81 | * that are expensive on 32-bit architectures. |
82 | */ | 82 | */ |
83 | 83 | ||
84 | struct internal_sc | 84 | struct internal_sc { |
85 | { | ||
86 | u64 sm1; /* scaled slope of the 1st segment */ | 85 | u64 sm1; /* scaled slope of the 1st segment */ |
87 | u64 ism1; /* scaled inverse-slope of the 1st segment */ | 86 | u64 ism1; /* scaled inverse-slope of the 1st segment */ |
88 | u64 dx; /* the x-projection of the 1st segment */ | 87 | u64 dx; /* the x-projection of the 1st segment */ |
@@ -92,8 +91,7 @@ struct internal_sc | |||
92 | }; | 91 | }; |
93 | 92 | ||
94 | /* runtime service curve */ | 93 | /* runtime service curve */ |
95 | struct runtime_sc | 94 | struct runtime_sc { |
96 | { | ||
97 | u64 x; /* current starting position on x-axis */ | 95 | u64 x; /* current starting position on x-axis */ |
98 | u64 y; /* current starting position on y-axis */ | 96 | u64 y; /* current starting position on y-axis */ |
99 | u64 sm1; /* scaled slope of the 1st segment */ | 97 | u64 sm1; /* scaled slope of the 1st segment */ |
@@ -104,15 +102,13 @@ struct runtime_sc | |||
104 | u64 ism2; /* scaled inverse-slope of the 2nd segment */ | 102 | u64 ism2; /* scaled inverse-slope of the 2nd segment */ |
105 | }; | 103 | }; |
106 | 104 | ||
107 | enum hfsc_class_flags | 105 | enum hfsc_class_flags { |
108 | { | ||
109 | HFSC_RSC = 0x1, | 106 | HFSC_RSC = 0x1, |
110 | HFSC_FSC = 0x2, | 107 | HFSC_FSC = 0x2, |
111 | HFSC_USC = 0x4 | 108 | HFSC_USC = 0x4 |
112 | }; | 109 | }; |
113 | 110 | ||
114 | struct hfsc_class | 111 | struct hfsc_class { |
115 | { | ||
116 | struct Qdisc_class_common cl_common; | 112 | struct Qdisc_class_common cl_common; |
117 | unsigned int refcnt; /* usage count */ | 113 | unsigned int refcnt; /* usage count */ |
118 | 114 | ||
@@ -140,8 +136,8 @@ struct hfsc_class | |||
140 | u64 cl_cumul; /* cumulative work in bytes done by | 136 | u64 cl_cumul; /* cumulative work in bytes done by |
141 | real-time criteria */ | 137 | real-time criteria */ |
142 | 138 | ||
143 | u64 cl_d; /* deadline*/ | 139 | u64 cl_d; /* deadline*/ |
144 | u64 cl_e; /* eligible time */ | 140 | u64 cl_e; /* eligible time */ |
145 | u64 cl_vt; /* virtual time */ | 141 | u64 cl_vt; /* virtual time */ |
146 | u64 cl_f; /* time when this class will fit for | 142 | u64 cl_f; /* time when this class will fit for |
147 | link-sharing, max(myf, cfmin) */ | 143 | link-sharing, max(myf, cfmin) */ |
@@ -176,8 +172,7 @@ struct hfsc_class | |||
176 | unsigned long cl_nactive; /* number of active children */ | 172 | unsigned long cl_nactive; /* number of active children */ |
177 | }; | 173 | }; |
178 | 174 | ||
179 | struct hfsc_sched | 175 | struct hfsc_sched { |
180 | { | ||
181 | u16 defcls; /* default class id */ | 176 | u16 defcls; /* default class id */ |
182 | struct hfsc_class root; /* root class */ | 177 | struct hfsc_class root; /* root class */ |
183 | struct Qdisc_class_hash clhash; /* class hash */ | 178 | struct Qdisc_class_hash clhash; /* class hash */ |
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len) | |||
693 | if (go_active) { | 688 | if (go_active) { |
694 | n = rb_last(&cl->cl_parent->vt_tree); | 689 | n = rb_last(&cl->cl_parent->vt_tree); |
695 | if (n != NULL) { | 690 | if (n != NULL) { |
696 | max_cl = rb_entry(n, struct hfsc_class,vt_node); | 691 | max_cl = rb_entry(n, struct hfsc_class, vt_node); |
697 | /* | 692 | /* |
698 | * set vt to the average of the min and max | 693 | * set vt to the average of the min and max |
699 | * classes. if the parent's period didn't | 694 | * classes. if the parent's period didn't |
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
1177 | return NULL; | 1172 | return NULL; |
1178 | } | 1173 | } |
1179 | #endif | 1174 | #endif |
1180 | if ((cl = (struct hfsc_class *)res.class) == NULL) { | 1175 | cl = (struct hfsc_class *)res.class; |
1181 | if ((cl = hfsc_find_class(res.classid, sch)) == NULL) | 1176 | if (!cl) { |
1177 | cl = hfsc_find_class(res.classid, sch); | ||
1178 | if (!cl) | ||
1182 | break; /* filter selected invalid classid */ | 1179 | break; /* filter selected invalid classid */ |
1183 | if (cl->level >= head->level) | 1180 | if (cl->level >= head->level) |
1184 | break; /* filter may only point downwards */ | 1181 | break; /* filter may only point downwards */ |
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) | |||
1316 | return -1; | 1313 | return -1; |
1317 | } | 1314 | } |
1318 | 1315 | ||
1319 | static inline int | 1316 | static int |
1320 | hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) | 1317 | hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) |
1321 | { | 1318 | { |
1322 | if ((cl->cl_flags & HFSC_RSC) && | 1319 | if ((cl->cl_flags & HFSC_RSC) && |
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch) | |||
1420 | struct hfsc_class *cl; | 1417 | struct hfsc_class *cl; |
1421 | u64 next_time = 0; | 1418 | u64 next_time = 0; |
1422 | 1419 | ||
1423 | if ((cl = eltree_get_minel(q)) != NULL) | 1420 | cl = eltree_get_minel(q); |
1421 | if (cl) | ||
1424 | next_time = cl->cl_e; | 1422 | next_time = cl->cl_e; |
1425 | if (q->root.cl_cfmin != 0) { | 1423 | if (q->root.cl_cfmin != 0) { |
1426 | if (next_time == 0 || next_time > q->root.cl_cfmin) | 1424 | if (next_time == 0 || next_time > q->root.cl_cfmin) |
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch) | |||
1625 | * find the class with the minimum deadline among | 1623 | * find the class with the minimum deadline among |
1626 | * the eligible classes. | 1624 | * the eligible classes. |
1627 | */ | 1625 | */ |
1628 | if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { | 1626 | cl = eltree_get_mindl(q, cur_time); |
1627 | if (cl) { | ||
1629 | realtime = 1; | 1628 | realtime = 1; |
1630 | } else { | 1629 | } else { |
1631 | /* | 1630 | /* |
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch) | |||
1664 | set_passive(cl); | 1663 | set_passive(cl); |
1665 | } | 1664 | } |
1666 | 1665 | ||
1667 | sch->flags &= ~TCQ_F_THROTTLED; | 1666 | qdisc_unthrottled(sch); |
1668 | qdisc_bstats_update(sch, skb); | 1667 | qdisc_bstats_update(sch, skb); |
1669 | sch->q.qlen--; | 1668 | sch->q.qlen--; |
1670 | 1669 | ||
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index fc12fe6f5597..e1429a85091f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c | |||
@@ -99,9 +99,10 @@ struct htb_class { | |||
99 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ | 99 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ |
100 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ | 100 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ |
101 | /* When class changes from state 1->2 and disconnects from | 101 | /* When class changes from state 1->2 and disconnects from |
102 | parent's feed then we lost ptr value and start from the | 102 | * parent's feed then we lost ptr value and start from the |
103 | first child again. Here we store classid of the | 103 | * first child again. Here we store classid of the |
104 | last valid ptr (used when ptr is NULL). */ | 104 | * last valid ptr (used when ptr is NULL). |
105 | */ | ||
105 | u32 last_ptr_id[TC_HTB_NUMPRIO]; | 106 | u32 last_ptr_id[TC_HTB_NUMPRIO]; |
106 | } inner; | 107 | } inner; |
107 | } un; | 108 | } un; |
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) | |||
185 | * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull | 186 | * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull |
186 | * then finish and return direct queue. | 187 | * then finish and return direct queue. |
187 | */ | 188 | */ |
188 | #define HTB_DIRECT (struct htb_class*)-1 | 189 | #define HTB_DIRECT ((struct htb_class *)-1L) |
189 | 190 | ||
190 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | 191 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, |
191 | int *qerr) | 192 | int *qerr) |
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
197 | int result; | 198 | int result; |
198 | 199 | ||
199 | /* allow to select class by setting skb->priority to valid classid; | 200 | /* allow to select class by setting skb->priority to valid classid; |
200 | note that nfmark can be used too by attaching filter fw with no | 201 | * note that nfmark can be used too by attaching filter fw with no |
201 | rules in it */ | 202 | * rules in it |
203 | */ | ||
202 | if (skb->priority == sch->handle) | 204 | if (skb->priority == sch->handle) |
203 | return HTB_DIRECT; /* X:0 (direct flow) selected */ | 205 | return HTB_DIRECT; /* X:0 (direct flow) selected */ |
204 | if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) | 206 | cl = htb_find(skb->priority, sch); |
207 | if (cl && cl->level == 0) | ||
205 | return cl; | 208 | return cl; |
206 | 209 | ||
207 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 210 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; |
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
216 | return NULL; | 219 | return NULL; |
217 | } | 220 | } |
218 | #endif | 221 | #endif |
219 | if ((cl = (void *)res.class) == NULL) { | 222 | cl = (void *)res.class; |
223 | if (!cl) { | ||
220 | if (res.classid == sch->handle) | 224 | if (res.classid == sch->handle) |
221 | return HTB_DIRECT; /* X:0 (direct flow) */ | 225 | return HTB_DIRECT; /* X:0 (direct flow) */ |
222 | if ((cl = htb_find(res.classid, sch)) == NULL) | 226 | cl = htb_find(res.classid, sch); |
227 | if (!cl) | ||
223 | break; /* filter selected invalid classid */ | 228 | break; /* filter selected invalid classid */ |
224 | } | 229 | } |
225 | if (!cl->level) | 230 | if (!cl->level) |
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) | |||
378 | 383 | ||
379 | if (p->un.inner.feed[prio].rb_node) | 384 | if (p->un.inner.feed[prio].rb_node) |
380 | /* parent already has its feed in use so that | 385 | /* parent already has its feed in use so that |
381 | reset bit in mask as parent is already ok */ | 386 | * reset bit in mask as parent is already ok |
387 | */ | ||
382 | mask &= ~(1 << prio); | 388 | mask &= ~(1 << prio); |
383 | 389 | ||
384 | htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); | 390 | htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); |
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) | |||
413 | 419 | ||
414 | if (p->un.inner.ptr[prio] == cl->node + prio) { | 420 | if (p->un.inner.ptr[prio] == cl->node + prio) { |
415 | /* we are removing child which is pointed to from | 421 | /* we are removing child which is pointed to from |
416 | parent feed - forget the pointer but remember | 422 | * parent feed - forget the pointer but remember |
417 | classid */ | 423 | * classid |
424 | */ | ||
418 | p->un.inner.last_ptr_id[prio] = cl->common.classid; | 425 | p->un.inner.last_ptr_id[prio] = cl->common.classid; |
419 | p->un.inner.ptr[prio] = NULL; | 426 | p->un.inner.ptr[prio] = NULL; |
420 | } | 427 | } |
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
663 | unsigned long start) | 670 | unsigned long start) |
664 | { | 671 | { |
665 | /* don't run for longer than 2 jiffies; 2 is used instead of | 672 | /* don't run for longer than 2 jiffies; 2 is used instead of |
666 | 1 to simplify things when jiffy is going to be incremented | 673 | * 1 to simplify things when jiffy is going to be incremented |
667 | too soon */ | 674 | * too soon |
675 | */ | ||
668 | unsigned long stop_at = start + 2; | 676 | unsigned long stop_at = start + 2; |
669 | while (time_before(jiffies, stop_at)) { | 677 | while (time_before(jiffies, stop_at)) { |
670 | struct htb_class *cl; | 678 | struct htb_class *cl; |
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
687 | 695 | ||
688 | /* too much load - let's continue after a break for scheduling */ | 696 | /* too much load - let's continue after a break for scheduling */ |
689 | if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { | 697 | if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { |
690 | printk(KERN_WARNING "htb: too many events!\n"); | 698 | pr_warning("htb: too many events!\n"); |
691 | q->warned |= HTB_WARN_TOOMANYEVENTS; | 699 | q->warned |= HTB_WARN_TOOMANYEVENTS; |
692 | } | 700 | } |
693 | 701 | ||
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
695 | } | 703 | } |
696 | 704 | ||
697 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL | 705 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL |
698 | is no such one exists. */ | 706 | * is no such one exists. |
707 | */ | ||
699 | static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, | 708 | static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, |
700 | u32 id) | 709 | u32 id) |
701 | { | 710 | { |
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, | |||
739 | for (i = 0; i < 65535; i++) { | 748 | for (i = 0; i < 65535; i++) { |
740 | if (!*sp->pptr && *sp->pid) { | 749 | if (!*sp->pptr && *sp->pid) { |
741 | /* ptr was invalidated but id is valid - try to recover | 750 | /* ptr was invalidated but id is valid - try to recover |
742 | the original or next ptr */ | 751 | * the original or next ptr |
752 | */ | ||
743 | *sp->pptr = | 753 | *sp->pptr = |
744 | htb_id_find_next_upper(prio, sp->root, *sp->pid); | 754 | htb_id_find_next_upper(prio, sp->root, *sp->pid); |
745 | } | 755 | } |
746 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it | 756 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it |
747 | can become out of date quickly */ | 757 | * can become out of date quickly |
758 | */ | ||
748 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ | 759 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ |
749 | *sp->pptr = sp->root; | 760 | *sp->pptr = sp->root; |
750 | while ((*sp->pptr)->rb_left) | 761 | while ((*sp->pptr)->rb_left) |
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, | |||
772 | } | 783 | } |
773 | 784 | ||
774 | /* dequeues packet at given priority and level; call only if | 785 | /* dequeues packet at given priority and level; call only if |
775 | you are sure that there is active class at prio/level */ | 786 | * you are sure that there is active class at prio/level |
787 | */ | ||
776 | static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, | 788 | static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, |
777 | int level) | 789 | int level) |
778 | { | 790 | { |
@@ -789,9 +801,10 @@ next: | |||
789 | return NULL; | 801 | return NULL; |
790 | 802 | ||
791 | /* class can be empty - it is unlikely but can be true if leaf | 803 | /* class can be empty - it is unlikely but can be true if leaf |
792 | qdisc drops packets in enqueue routine or if someone used | 804 | * qdisc drops packets in enqueue routine or if someone used |
793 | graft operation on the leaf since last dequeue; | 805 | * graft operation on the leaf since last dequeue; |
794 | simply deactivate and skip such class */ | 806 | * simply deactivate and skip such class |
807 | */ | ||
795 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { | 808 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { |
796 | struct htb_class *next; | 809 | struct htb_class *next; |
797 | htb_deactivate(q, cl); | 810 | htb_deactivate(q, cl); |
@@ -831,7 +844,8 @@ next: | |||
831 | ptr[0]) + prio); | 844 | ptr[0]) + prio); |
832 | } | 845 | } |
833 | /* this used to be after charge_class but this constelation | 846 | /* this used to be after charge_class but this constelation |
834 | gives us slightly better performance */ | 847 | * gives us slightly better performance |
848 | */ | ||
835 | if (!cl->un.leaf.q->q.qlen) | 849 | if (!cl->un.leaf.q->q.qlen) |
836 | htb_deactivate(q, cl); | 850 | htb_deactivate(q, cl); |
837 | htb_charge_class(q, cl, level, skb); | 851 | htb_charge_class(q, cl, level, skb); |
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
852 | if (skb != NULL) { | 866 | if (skb != NULL) { |
853 | ok: | 867 | ok: |
854 | qdisc_bstats_update(sch, skb); | 868 | qdisc_bstats_update(sch, skb); |
855 | sch->flags &= ~TCQ_F_THROTTLED; | 869 | qdisc_unthrottled(sch); |
856 | sch->q.qlen--; | 870 | sch->q.qlen--; |
857 | return skb; | 871 | return skb; |
858 | } | 872 | } |
@@ -883,6 +897,7 @@ ok: | |||
883 | m = ~q->row_mask[level]; | 897 | m = ~q->row_mask[level]; |
884 | while (m != (int)(-1)) { | 898 | while (m != (int)(-1)) { |
885 | int prio = ffz(m); | 899 | int prio = ffz(m); |
900 | |||
886 | m |= 1 << prio; | 901 | m |= 1 << prio; |
887 | skb = htb_dequeue_tree(q, prio, level); | 902 | skb = htb_dequeue_tree(q, prio, level); |
888 | if (likely(skb != NULL)) | 903 | if (likely(skb != NULL)) |
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) | |||
987 | return err; | 1002 | return err; |
988 | 1003 | ||
989 | if (tb[TCA_HTB_INIT] == NULL) { | 1004 | if (tb[TCA_HTB_INIT] == NULL) { |
990 | printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); | 1005 | pr_err("HTB: hey probably you have bad tc tool ?\n"); |
991 | return -EINVAL; | 1006 | return -EINVAL; |
992 | } | 1007 | } |
993 | gopt = nla_data(tb[TCA_HTB_INIT]); | 1008 | gopt = nla_data(tb[TCA_HTB_INIT]); |
994 | if (gopt->version != HTB_VER >> 16) { | 1009 | if (gopt->version != HTB_VER >> 16) { |
995 | printk(KERN_ERR | 1010 | pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", |
996 | "HTB: need tc/htb version %d (minor is %d), you have %d\n", | ||
997 | HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); | 1011 | HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); |
998 | return -EINVAL; | 1012 | return -EINVAL; |
999 | } | 1013 | } |
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch) | |||
1206 | cancel_work_sync(&q->work); | 1220 | cancel_work_sync(&q->work); |
1207 | qdisc_watchdog_cancel(&q->watchdog); | 1221 | qdisc_watchdog_cancel(&q->watchdog); |
1208 | /* This line used to be after htb_destroy_class call below | 1222 | /* This line used to be after htb_destroy_class call below |
1209 | and surprisingly it worked in 2.4. But it must precede it | 1223 | * and surprisingly it worked in 2.4. But it must precede it |
1210 | because filter need its target class alive to be able to call | 1224 | * because filter need its target class alive to be able to call |
1211 | unbind_filter on it (without Oops). */ | 1225 | * unbind_filter on it (without Oops). |
1226 | */ | ||
1212 | tcf_destroy_chain(&q->filter_list); | 1227 | tcf_destroy_chain(&q->filter_list); |
1213 | 1228 | ||
1214 | for (i = 0; i < q->clhash.hashsize; i++) { | 1229 | for (i = 0; i < q->clhash.hashsize; i++) { |
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1342 | 1357 | ||
1343 | /* check maximal depth */ | 1358 | /* check maximal depth */ |
1344 | if (parent && parent->parent && parent->parent->level < 2) { | 1359 | if (parent && parent->parent && parent->parent->level < 2) { |
1345 | printk(KERN_ERR "htb: tree is too deep\n"); | 1360 | pr_err("htb: tree is too deep\n"); |
1346 | goto failure; | 1361 | goto failure; |
1347 | } | 1362 | } |
1348 | err = -ENOBUFS; | 1363 | err = -ENOBUFS; |
1349 | if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) | 1364 | cl = kzalloc(sizeof(*cl), GFP_KERNEL); |
1365 | if (!cl) | ||
1350 | goto failure; | 1366 | goto failure; |
1351 | 1367 | ||
1352 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, | 1368 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, |
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1366 | RB_CLEAR_NODE(&cl->node[prio]); | 1382 | RB_CLEAR_NODE(&cl->node[prio]); |
1367 | 1383 | ||
1368 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) | 1384 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) |
1369 | so that can't be used inside of sch_tree_lock | 1385 | * so that can't be used inside of sch_tree_lock |
1370 | -- thanks to Karlis Peisenieks */ | 1386 | * -- thanks to Karlis Peisenieks |
1387 | */ | ||
1371 | new_q = qdisc_create_dflt(sch->dev_queue, | 1388 | new_q = qdisc_create_dflt(sch->dev_queue, |
1372 | &pfifo_qdisc_ops, classid); | 1389 | &pfifo_qdisc_ops, classid); |
1373 | sch_tree_lock(sch); | 1390 | sch_tree_lock(sch); |
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1419 | } | 1436 | } |
1420 | 1437 | ||
1421 | /* it used to be a nasty bug here, we have to check that node | 1438 | /* it used to be a nasty bug here, we have to check that node |
1422 | is really leaf before changing cl->un.leaf ! */ | 1439 | * is really leaf before changing cl->un.leaf ! |
1440 | */ | ||
1423 | if (!cl->level) { | 1441 | if (!cl->level) { |
1424 | cl->quantum = rtab->rate.rate / q->rate2quantum; | 1442 | cl->quantum = rtab->rate.rate / q->rate2quantum; |
1425 | if (!hopt->quantum && cl->quantum < 1000) { | 1443 | if (!hopt->quantum && cl->quantum < 1000) { |
1426 | printk(KERN_WARNING | 1444 | pr_warning( |
1427 | "HTB: quantum of class %X is small. Consider r2q change.\n", | 1445 | "HTB: quantum of class %X is small. Consider r2q change.\n", |
1428 | cl->common.classid); | 1446 | cl->common.classid); |
1429 | cl->quantum = 1000; | 1447 | cl->quantum = 1000; |
1430 | } | 1448 | } |
1431 | if (!hopt->quantum && cl->quantum > 200000) { | 1449 | if (!hopt->quantum && cl->quantum > 200000) { |
1432 | printk(KERN_WARNING | 1450 | pr_warning( |
1433 | "HTB: quantum of class %X is big. Consider r2q change.\n", | 1451 | "HTB: quantum of class %X is big. Consider r2q change.\n", |
1434 | cl->common.classid); | 1452 | cl->common.classid); |
1435 | cl->quantum = 200000; | 1453 | cl->quantum = 200000; |
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1478 | struct htb_class *cl = htb_find(classid, sch); | 1496 | struct htb_class *cl = htb_find(classid, sch); |
1479 | 1497 | ||
1480 | /*if (cl && !cl->level) return 0; | 1498 | /*if (cl && !cl->level) return 0; |
1481 | The line above used to be there to prevent attaching filters to | 1499 | * The line above used to be there to prevent attaching filters to |
1482 | leaves. But at least tc_index filter uses this just to get class | 1500 | * leaves. But at least tc_index filter uses this just to get class |
1483 | for other reasons so that we have to allow for it. | 1501 | * for other reasons so that we have to allow for it. |
1484 | ---- | 1502 | * ---- |
1485 | 19.6.2002 As Werner explained it is ok - bind filter is just | 1503 | * 19.6.2002 As Werner explained it is ok - bind filter is just |
1486 | another way to "lock" the class - unlike "get" this lock can | 1504 | * another way to "lock" the class - unlike "get" this lock can |
1487 | be broken by class during destroy IIUC. | 1505 | * be broken by class during destroy IIUC. |
1488 | */ | 1506 | */ |
1489 | if (cl) | 1507 | if (cl) |
1490 | cl->filter_cnt++; | 1508 | cl->filter_cnt++; |
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index ecc302f4d2a1..ec5cbc848963 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c | |||
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) | |||
61 | TC_H_MIN(ntx + 1))); | 61 | TC_H_MIN(ntx + 1))); |
62 | if (qdisc == NULL) | 62 | if (qdisc == NULL) |
63 | goto err; | 63 | goto err; |
64 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
65 | priv->qdiscs[ntx] = qdisc; | 64 | priv->qdiscs[ntx] = qdisc; |
66 | } | 65 | } |
67 | 66 | ||
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c new file mode 100644 index 000000000000..fbc6f53cb1b7 --- /dev/null +++ b/net/sched/sch_mqprio.c | |||
@@ -0,0 +1,416 @@ | |||
1 | /* | ||
2 | * net/sched/sch_mqprio.c | ||
3 | * | ||
4 | * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * version 2 as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/types.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <net/netlink.h> | ||
18 | #include <net/pkt_sched.h> | ||
19 | #include <net/sch_generic.h> | ||
20 | |||
21 | struct mqprio_sched { | ||
22 | struct Qdisc **qdiscs; | ||
23 | int hw_owned; | ||
24 | }; | ||
25 | |||
26 | static void mqprio_destroy(struct Qdisc *sch) | ||
27 | { | ||
28 | struct net_device *dev = qdisc_dev(sch); | ||
29 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
30 | unsigned int ntx; | ||
31 | |||
32 | if (!priv->qdiscs) | ||
33 | return; | ||
34 | |||
35 | for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) | ||
36 | qdisc_destroy(priv->qdiscs[ntx]); | ||
37 | |||
38 | if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) | ||
39 | dev->netdev_ops->ndo_setup_tc(dev, 0); | ||
40 | else | ||
41 | netdev_set_num_tc(dev, 0); | ||
42 | |||
43 | kfree(priv->qdiscs); | ||
44 | } | ||
45 | |||
46 | static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) | ||
47 | { | ||
48 | int i, j; | ||
49 | |||
50 | /* Verify num_tc is not out of max range */ | ||
51 | if (qopt->num_tc > TC_MAX_QUEUE) | ||
52 | return -EINVAL; | ||
53 | |||
54 | /* Verify priority mapping uses valid tcs */ | ||
55 | for (i = 0; i < TC_BITMASK + 1; i++) { | ||
56 | if (qopt->prio_tc_map[i] >= qopt->num_tc) | ||
57 | return -EINVAL; | ||
58 | } | ||
59 | |||
60 | /* net_device does not support requested operation */ | ||
61 | if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) | ||
62 | return -EINVAL; | ||
63 | |||
64 | /* if hw owned qcount and qoffset are taken from LLD so | ||
65 | * no reason to verify them here | ||
66 | */ | ||
67 | if (qopt->hw) | ||
68 | return 0; | ||
69 | |||
70 | for (i = 0; i < qopt->num_tc; i++) { | ||
71 | unsigned int last = qopt->offset[i] + qopt->count[i]; | ||
72 | |||
73 | /* Verify the queue count is in tx range being equal to the | ||
74 | * real_num_tx_queues indicates the last queue is in use. | ||
75 | */ | ||
76 | if (qopt->offset[i] >= dev->real_num_tx_queues || | ||
77 | !qopt->count[i] || | ||
78 | last > dev->real_num_tx_queues) | ||
79 | return -EINVAL; | ||
80 | |||
81 | /* Verify that the offset and counts do not overlap */ | ||
82 | for (j = i + 1; j < qopt->num_tc; j++) { | ||
83 | if (last > qopt->offset[j]) | ||
84 | return -EINVAL; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) | ||
92 | { | ||
93 | struct net_device *dev = qdisc_dev(sch); | ||
94 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
95 | struct netdev_queue *dev_queue; | ||
96 | struct Qdisc *qdisc; | ||
97 | int i, err = -EOPNOTSUPP; | ||
98 | struct tc_mqprio_qopt *qopt = NULL; | ||
99 | |||
100 | BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); | ||
101 | BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); | ||
102 | |||
103 | if (sch->parent != TC_H_ROOT) | ||
104 | return -EOPNOTSUPP; | ||
105 | |||
106 | if (!netif_is_multiqueue(dev)) | ||
107 | return -EOPNOTSUPP; | ||
108 | |||
109 | if (nla_len(opt) < sizeof(*qopt)) | ||
110 | return -EINVAL; | ||
111 | |||
112 | qopt = nla_data(opt); | ||
113 | if (mqprio_parse_opt(dev, qopt)) | ||
114 | return -EINVAL; | ||
115 | |||
116 | /* pre-allocate qdisc, attachment can't fail */ | ||
117 | priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), | ||
118 | GFP_KERNEL); | ||
119 | if (priv->qdiscs == NULL) { | ||
120 | err = -ENOMEM; | ||
121 | goto err; | ||
122 | } | ||
123 | |||
124 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
125 | dev_queue = netdev_get_tx_queue(dev, i); | ||
126 | qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, | ||
127 | TC_H_MAKE(TC_H_MAJ(sch->handle), | ||
128 | TC_H_MIN(i + 1))); | ||
129 | if (qdisc == NULL) { | ||
130 | err = -ENOMEM; | ||
131 | goto err; | ||
132 | } | ||
133 | priv->qdiscs[i] = qdisc; | ||
134 | } | ||
135 | |||
136 | /* If the mqprio options indicate that hardware should own | ||
137 | * the queue mapping then run ndo_setup_tc otherwise use the | ||
138 | * supplied and verified mapping | ||
139 | */ | ||
140 | if (qopt->hw) { | ||
141 | priv->hw_owned = 1; | ||
142 | err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); | ||
143 | if (err) | ||
144 | goto err; | ||
145 | } else { | ||
146 | netdev_set_num_tc(dev, qopt->num_tc); | ||
147 | for (i = 0; i < qopt->num_tc; i++) | ||
148 | netdev_set_tc_queue(dev, i, | ||
149 | qopt->count[i], qopt->offset[i]); | ||
150 | } | ||
151 | |||
152 | /* Always use supplied priority mappings */ | ||
153 | for (i = 0; i < TC_BITMASK + 1; i++) | ||
154 | netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); | ||
155 | |||
156 | sch->flags |= TCQ_F_MQROOT; | ||
157 | return 0; | ||
158 | |||
159 | err: | ||
160 | mqprio_destroy(sch); | ||
161 | return err; | ||
162 | } | ||
163 | |||
164 | static void mqprio_attach(struct Qdisc *sch) | ||
165 | { | ||
166 | struct net_device *dev = qdisc_dev(sch); | ||
167 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
168 | struct Qdisc *qdisc; | ||
169 | unsigned int ntx; | ||
170 | |||
171 | /* Attach underlying qdisc */ | ||
172 | for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { | ||
173 | qdisc = priv->qdiscs[ntx]; | ||
174 | qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); | ||
175 | if (qdisc) | ||
176 | qdisc_destroy(qdisc); | ||
177 | } | ||
178 | kfree(priv->qdiscs); | ||
179 | priv->qdiscs = NULL; | ||
180 | } | ||
181 | |||
182 | static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, | ||
183 | unsigned long cl) | ||
184 | { | ||
185 | struct net_device *dev = qdisc_dev(sch); | ||
186 | unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); | ||
187 | |||
188 | if (ntx >= dev->num_tx_queues) | ||
189 | return NULL; | ||
190 | return netdev_get_tx_queue(dev, ntx); | ||
191 | } | ||
192 | |||
193 | static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, | ||
194 | struct Qdisc **old) | ||
195 | { | ||
196 | struct net_device *dev = qdisc_dev(sch); | ||
197 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
198 | |||
199 | if (!dev_queue) | ||
200 | return -EINVAL; | ||
201 | |||
202 | if (dev->flags & IFF_UP) | ||
203 | dev_deactivate(dev); | ||
204 | |||
205 | *old = dev_graft_qdisc(dev_queue, new); | ||
206 | |||
207 | if (dev->flags & IFF_UP) | ||
208 | dev_activate(dev); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
214 | { | ||
215 | struct net_device *dev = qdisc_dev(sch); | ||
216 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
217 | unsigned char *b = skb_tail_pointer(skb); | ||
218 | struct tc_mqprio_qopt opt; | ||
219 | struct Qdisc *qdisc; | ||
220 | unsigned int i; | ||
221 | |||
222 | sch->q.qlen = 0; | ||
223 | memset(&sch->bstats, 0, sizeof(sch->bstats)); | ||
224 | memset(&sch->qstats, 0, sizeof(sch->qstats)); | ||
225 | |||
226 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
227 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | ||
228 | spin_lock_bh(qdisc_lock(qdisc)); | ||
229 | sch->q.qlen += qdisc->q.qlen; | ||
230 | sch->bstats.bytes += qdisc->bstats.bytes; | ||
231 | sch->bstats.packets += qdisc->bstats.packets; | ||
232 | sch->qstats.qlen += qdisc->qstats.qlen; | ||
233 | sch->qstats.backlog += qdisc->qstats.backlog; | ||
234 | sch->qstats.drops += qdisc->qstats.drops; | ||
235 | sch->qstats.requeues += qdisc->qstats.requeues; | ||
236 | sch->qstats.overlimits += qdisc->qstats.overlimits; | ||
237 | spin_unlock_bh(qdisc_lock(qdisc)); | ||
238 | } | ||
239 | |||
240 | opt.num_tc = netdev_get_num_tc(dev); | ||
241 | memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); | ||
242 | opt.hw = priv->hw_owned; | ||
243 | |||
244 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | ||
245 | opt.count[i] = dev->tc_to_txq[i].count; | ||
246 | opt.offset[i] = dev->tc_to_txq[i].offset; | ||
247 | } | ||
248 | |||
249 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
250 | |||
251 | return skb->len; | ||
252 | nla_put_failure: | ||
253 | nlmsg_trim(skb, b); | ||
254 | return -1; | ||
255 | } | ||
256 | |||
257 | static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) | ||
258 | { | ||
259 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
260 | |||
261 | if (!dev_queue) | ||
262 | return NULL; | ||
263 | |||
264 | return dev_queue->qdisc_sleeping; | ||
265 | } | ||
266 | |||
267 | static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) | ||
268 | { | ||
269 | struct net_device *dev = qdisc_dev(sch); | ||
270 | unsigned int ntx = TC_H_MIN(classid); | ||
271 | |||
272 | if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) | ||
273 | return 0; | ||
274 | return ntx; | ||
275 | } | ||
276 | |||
277 | static void mqprio_put(struct Qdisc *sch, unsigned long cl) | ||
278 | { | ||
279 | } | ||
280 | |||
281 | static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, | ||
282 | struct sk_buff *skb, struct tcmsg *tcm) | ||
283 | { | ||
284 | struct net_device *dev = qdisc_dev(sch); | ||
285 | |||
286 | if (cl <= netdev_get_num_tc(dev)) { | ||
287 | tcm->tcm_parent = TC_H_ROOT; | ||
288 | tcm->tcm_info = 0; | ||
289 | } else { | ||
290 | int i; | ||
291 | struct netdev_queue *dev_queue; | ||
292 | |||
293 | dev_queue = mqprio_queue_get(sch, cl); | ||
294 | tcm->tcm_parent = 0; | ||
295 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | ||
296 | struct netdev_tc_txq tc = dev->tc_to_txq[i]; | ||
297 | int q_idx = cl - netdev_get_num_tc(dev); | ||
298 | |||
299 | if (q_idx > tc.offset && | ||
300 | q_idx <= tc.offset + tc.count) { | ||
301 | tcm->tcm_parent = | ||
302 | TC_H_MAKE(TC_H_MAJ(sch->handle), | ||
303 | TC_H_MIN(i + 1)); | ||
304 | break; | ||
305 | } | ||
306 | } | ||
307 | tcm->tcm_info = dev_queue->qdisc_sleeping->handle; | ||
308 | } | ||
309 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | ||
314 | struct gnet_dump *d) | ||
315 | { | ||
316 | struct net_device *dev = qdisc_dev(sch); | ||
317 | |||
318 | if (cl <= netdev_get_num_tc(dev)) { | ||
319 | int i; | ||
320 | struct Qdisc *qdisc; | ||
321 | struct gnet_stats_queue qstats = {0}; | ||
322 | struct gnet_stats_basic_packed bstats = {0}; | ||
323 | struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; | ||
324 | |||
325 | /* Drop lock here it will be reclaimed before touching | ||
326 | * statistics this is required because the d->lock we | ||
327 | * hold here is the look on dev_queue->qdisc_sleeping | ||
328 | * also acquired below. | ||
329 | */ | ||
330 | spin_unlock_bh(d->lock); | ||
331 | |||
332 | for (i = tc.offset; i < tc.offset + tc.count; i++) { | ||
333 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | ||
334 | spin_lock_bh(qdisc_lock(qdisc)); | ||
335 | bstats.bytes += qdisc->bstats.bytes; | ||
336 | bstats.packets += qdisc->bstats.packets; | ||
337 | qstats.qlen += qdisc->qstats.qlen; | ||
338 | qstats.backlog += qdisc->qstats.backlog; | ||
339 | qstats.drops += qdisc->qstats.drops; | ||
340 | qstats.requeues += qdisc->qstats.requeues; | ||
341 | qstats.overlimits += qdisc->qstats.overlimits; | ||
342 | spin_unlock_bh(qdisc_lock(qdisc)); | ||
343 | } | ||
344 | /* Reclaim root sleeping lock before completing stats */ | ||
345 | spin_lock_bh(d->lock); | ||
346 | if (gnet_stats_copy_basic(d, &bstats) < 0 || | ||
347 | gnet_stats_copy_queue(d, &qstats) < 0) | ||
348 | return -1; | ||
349 | } else { | ||
350 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
351 | |||
352 | sch = dev_queue->qdisc_sleeping; | ||
353 | sch->qstats.qlen = sch->q.qlen; | ||
354 | if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || | ||
355 | gnet_stats_copy_queue(d, &sch->qstats) < 0) | ||
356 | return -1; | ||
357 | } | ||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
362 | { | ||
363 | struct net_device *dev = qdisc_dev(sch); | ||
364 | unsigned long ntx; | ||
365 | |||
366 | if (arg->stop) | ||
367 | return; | ||
368 | |||
369 | /* Walk hierarchy with a virtual class per tc */ | ||
370 | arg->count = arg->skip; | ||
371 | for (ntx = arg->skip; | ||
372 | ntx < dev->num_tx_queues + netdev_get_num_tc(dev); | ||
373 | ntx++) { | ||
374 | if (arg->fn(sch, ntx + 1, arg) < 0) { | ||
375 | arg->stop = 1; | ||
376 | break; | ||
377 | } | ||
378 | arg->count++; | ||
379 | } | ||
380 | } | ||
381 | |||
382 | static const struct Qdisc_class_ops mqprio_class_ops = { | ||
383 | .graft = mqprio_graft, | ||
384 | .leaf = mqprio_leaf, | ||
385 | .get = mqprio_get, | ||
386 | .put = mqprio_put, | ||
387 | .walk = mqprio_walk, | ||
388 | .dump = mqprio_dump_class, | ||
389 | .dump_stats = mqprio_dump_class_stats, | ||
390 | }; | ||
391 | |||
392 | struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { | ||
393 | .cl_ops = &mqprio_class_ops, | ||
394 | .id = "mqprio", | ||
395 | .priv_size = sizeof(struct mqprio_sched), | ||
396 | .init = mqprio_init, | ||
397 | .destroy = mqprio_destroy, | ||
398 | .attach = mqprio_attach, | ||
399 | .dump = mqprio_dump, | ||
400 | .owner = THIS_MODULE, | ||
401 | }; | ||
402 | |||
403 | static int __init mqprio_module_init(void) | ||
404 | { | ||
405 | return register_qdisc(&mqprio_qdisc_ops); | ||
406 | } | ||
407 | |||
408 | static void __exit mqprio_module_exit(void) | ||
409 | { | ||
410 | unregister_qdisc(&mqprio_qdisc_ops); | ||
411 | } | ||
412 | |||
413 | module_init(mqprio_module_init); | ||
414 | module_exit(mqprio_module_exit); | ||
415 | |||
416 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 436a2e75b322..edc1950e0e77 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c | |||
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch) | |||
156 | unsigned int len; | 156 | unsigned int len; |
157 | struct Qdisc *qdisc; | 157 | struct Qdisc *qdisc; |
158 | 158 | ||
159 | for (band = q->bands-1; band >= 0; band--) { | 159 | for (band = q->bands - 1; band >= 0; band--) { |
160 | qdisc = q->queues[band]; | 160 | qdisc = q->queues[band]; |
161 | if (qdisc->ops->drop) { | 161 | if (qdisc->ops->drop) { |
162 | len = qdisc->ops->drop(qdisc); | 162 | len = qdisc->ops->drop(qdisc); |
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) | |||
265 | for (i = 0; i < q->max_bands; i++) | 265 | for (i = 0; i < q->max_bands; i++) |
266 | q->queues[i] = &noop_qdisc; | 266 | q->queues[i] = &noop_qdisc; |
267 | 267 | ||
268 | err = multiq_tune(sch,opt); | 268 | err = multiq_tune(sch, opt); |
269 | 269 | ||
270 | if (err) | 270 | if (err) |
271 | kfree(q->queues); | 271 | kfree(q->queues); |
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, | |||
346 | struct multiq_sched_data *q = qdisc_priv(sch); | 346 | struct multiq_sched_data *q = qdisc_priv(sch); |
347 | 347 | ||
348 | tcm->tcm_handle |= TC_H_MIN(cl); | 348 | tcm->tcm_handle |= TC_H_MIN(cl); |
349 | tcm->tcm_info = q->queues[cl-1]->handle; | 349 | tcm->tcm_info = q->queues[cl - 1]->handle; |
350 | return 0; | 350 | return 0; |
351 | } | 351 | } |
352 | 352 | ||
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
378 | arg->count++; | 378 | arg->count++; |
379 | continue; | 379 | continue; |
380 | } | 380 | } |
381 | if (arg->fn(sch, band+1, arg) < 0) { | 381 | if (arg->fn(sch, band + 1, arg) < 0) { |
382 | arg->stop = 1; | 382 | arg->stop = 1; |
383 | break; | 383 | break; |
384 | } | 384 | } |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6a3006b38dc5..64f0d3293b49 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -211,8 +211,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
211 | } | 211 | } |
212 | 212 | ||
213 | cb = netem_skb_cb(skb); | 213 | cb = netem_skb_cb(skb); |
214 | if (q->gap == 0 || /* not doing reordering */ | 214 | if (q->gap == 0 || /* not doing reordering */ |
215 | q->counter < q->gap || /* inside last reordering gap */ | 215 | q->counter < q->gap || /* inside last reordering gap */ |
216 | q->reorder < get_crandom(&q->reorder_cor)) { | 216 | q->reorder < get_crandom(&q->reorder_cor)) { |
217 | psched_time_t now; | 217 | psched_time_t now; |
218 | psched_tdiff_t delay; | 218 | psched_tdiff_t delay; |
@@ -248,7 +248,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
248 | return ret; | 248 | return ret; |
249 | } | 249 | } |
250 | 250 | ||
251 | static unsigned int netem_drop(struct Qdisc* sch) | 251 | static unsigned int netem_drop(struct Qdisc *sch) |
252 | { | 252 | { |
253 | struct netem_sched_data *q = qdisc_priv(sch); | 253 | struct netem_sched_data *q = qdisc_priv(sch); |
254 | unsigned int len = 0; | 254 | unsigned int len = 0; |
@@ -265,7 +265,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
265 | struct netem_sched_data *q = qdisc_priv(sch); | 265 | struct netem_sched_data *q = qdisc_priv(sch); |
266 | struct sk_buff *skb; | 266 | struct sk_buff *skb; |
267 | 267 | ||
268 | if (sch->flags & TCQ_F_THROTTLED) | 268 | if (qdisc_is_throttled(sch)) |
269 | return NULL; | 269 | return NULL; |
270 | 270 | ||
271 | skb = q->qdisc->ops->peek(q->qdisc); | 271 | skb = q->qdisc->ops->peek(q->qdisc); |
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fbd710d619bf..2a318f2dc3e5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c | |||
@@ -22,8 +22,7 @@ | |||
22 | #include <net/pkt_sched.h> | 22 | #include <net/pkt_sched.h> |
23 | 23 | ||
24 | 24 | ||
25 | struct prio_sched_data | 25 | struct prio_sched_data { |
26 | { | ||
27 | int bands; | 26 | int bands; |
28 | struct tcf_proto *filter_list; | 27 | struct tcf_proto *filter_list; |
29 | u8 prio2band[TC_PRIO_MAX+1]; | 28 | u8 prio2band[TC_PRIO_MAX+1]; |
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
54 | if (!q->filter_list || err < 0) { | 53 | if (!q->filter_list || err < 0) { |
55 | if (TC_H_MAJ(band)) | 54 | if (TC_H_MAJ(band)) |
56 | band = 0; | 55 | band = 0; |
57 | return q->queues[q->prio2band[band&TC_PRIO_MAX]]; | 56 | return q->queues[q->prio2band[band & TC_PRIO_MAX]]; |
58 | } | 57 | } |
59 | band = res.classid; | 58 | band = res.classid; |
60 | } | 59 | } |
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch) | |||
106 | return NULL; | 105 | return NULL; |
107 | } | 106 | } |
108 | 107 | ||
109 | static struct sk_buff *prio_dequeue(struct Qdisc* sch) | 108 | static struct sk_buff *prio_dequeue(struct Qdisc *sch) |
110 | { | 109 | { |
111 | struct prio_sched_data *q = qdisc_priv(sch); | 110 | struct prio_sched_data *q = qdisc_priv(sch); |
112 | int prio; | 111 | int prio; |
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch) | |||
124 | 123 | ||
125 | } | 124 | } |
126 | 125 | ||
127 | static unsigned int prio_drop(struct Qdisc* sch) | 126 | static unsigned int prio_drop(struct Qdisc *sch) |
128 | { | 127 | { |
129 | struct prio_sched_data *q = qdisc_priv(sch); | 128 | struct prio_sched_data *q = qdisc_priv(sch); |
130 | int prio; | 129 | int prio; |
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch) | |||
143 | 142 | ||
144 | 143 | ||
145 | static void | 144 | static void |
146 | prio_reset(struct Qdisc* sch) | 145 | prio_reset(struct Qdisc *sch) |
147 | { | 146 | { |
148 | int prio; | 147 | int prio; |
149 | struct prio_sched_data *q = qdisc_priv(sch); | 148 | struct prio_sched_data *q = qdisc_priv(sch); |
150 | 149 | ||
151 | for (prio=0; prio<q->bands; prio++) | 150 | for (prio = 0; prio < q->bands; prio++) |
152 | qdisc_reset(q->queues[prio]); | 151 | qdisc_reset(q->queues[prio]); |
153 | sch->q.qlen = 0; | 152 | sch->q.qlen = 0; |
154 | } | 153 | } |
155 | 154 | ||
156 | static void | 155 | static void |
157 | prio_destroy(struct Qdisc* sch) | 156 | prio_destroy(struct Qdisc *sch) |
158 | { | 157 | { |
159 | int prio; | 158 | int prio; |
160 | struct prio_sched_data *q = qdisc_priv(sch); | 159 | struct prio_sched_data *q = qdisc_priv(sch); |
161 | 160 | ||
162 | tcf_destroy_chain(&q->filter_list); | 161 | tcf_destroy_chain(&q->filter_list); |
163 | for (prio=0; prio<q->bands; prio++) | 162 | for (prio = 0; prio < q->bands; prio++) |
164 | qdisc_destroy(q->queues[prio]); | 163 | qdisc_destroy(q->queues[prio]); |
165 | } | 164 | } |
166 | 165 | ||
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
177 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) | 176 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) |
178 | return -EINVAL; | 177 | return -EINVAL; |
179 | 178 | ||
180 | for (i=0; i<=TC_PRIO_MAX; i++) { | 179 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
181 | if (qopt->priomap[i] >= qopt->bands) | 180 | if (qopt->priomap[i] >= qopt->bands) |
182 | return -EINVAL; | 181 | return -EINVAL; |
183 | } | 182 | } |
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
186 | q->bands = qopt->bands; | 185 | q->bands = qopt->bands; |
187 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); | 186 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); |
188 | 187 | ||
189 | for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { | 188 | for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { |
190 | struct Qdisc *child = q->queues[i]; | 189 | struct Qdisc *child = q->queues[i]; |
191 | q->queues[i] = &noop_qdisc; | 190 | q->queues[i] = &noop_qdisc; |
192 | if (child != &noop_qdisc) { | 191 | if (child != &noop_qdisc) { |
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
196 | } | 195 | } |
197 | sch_tree_unlock(sch); | 196 | sch_tree_unlock(sch); |
198 | 197 | ||
199 | for (i=0; i<q->bands; i++) { | 198 | for (i = 0; i < q->bands; i++) { |
200 | if (q->queues[i] == &noop_qdisc) { | 199 | if (q->queues[i] == &noop_qdisc) { |
201 | struct Qdisc *child, *old; | 200 | struct Qdisc *child, *old; |
201 | |||
202 | child = qdisc_create_dflt(sch->dev_queue, | 202 | child = qdisc_create_dflt(sch->dev_queue, |
203 | &pfifo_qdisc_ops, | 203 | &pfifo_qdisc_ops, |
204 | TC_H_MAKE(sch->handle, i + 1)); | 204 | TC_H_MAKE(sch->handle, i + 1)); |
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) | |||
224 | struct prio_sched_data *q = qdisc_priv(sch); | 224 | struct prio_sched_data *q = qdisc_priv(sch); |
225 | int i; | 225 | int i; |
226 | 226 | ||
227 | for (i=0; i<TCQ_PRIO_BANDS; i++) | 227 | for (i = 0; i < TCQ_PRIO_BANDS; i++) |
228 | q->queues[i] = &noop_qdisc; | 228 | q->queues[i] = &noop_qdisc; |
229 | 229 | ||
230 | if (opt == NULL) { | 230 | if (opt == NULL) { |
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) | |||
232 | } else { | 232 | } else { |
233 | int err; | 233 | int err; |
234 | 234 | ||
235 | if ((err= prio_tune(sch, opt)) != 0) | 235 | if ((err = prio_tune(sch, opt)) != 0) |
236 | return err; | 236 | return err; |
237 | } | 237 | } |
238 | return 0; | 238 | return 0; |
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
245 | struct tc_prio_qopt opt; | 245 | struct tc_prio_qopt opt; |
246 | 246 | ||
247 | opt.bands = q->bands; | 247 | opt.bands = q->bands; |
248 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); | 248 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); |
249 | 249 | ||
250 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 250 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
251 | 251 | ||
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
342 | arg->count++; | 342 | arg->count++; |
343 | continue; | 343 | continue; |
344 | } | 344 | } |
345 | if (arg->fn(sch, prio+1, arg) < 0) { | 345 | if (arg->fn(sch, prio + 1, arg) < 0) { |
346 | arg->stop = 1; | 346 | arg->stop = 1; |
347 | break; | 347 | break; |
348 | } | 348 | } |
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
350 | } | 350 | } |
351 | } | 351 | } |
352 | 352 | ||
353 | static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) | 353 | static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl) |
354 | { | 354 | { |
355 | struct prio_sched_data *q = qdisc_priv(sch); | 355 | struct prio_sched_data *q = qdisc_priv(sch); |
356 | 356 | ||
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 9f98dbd32d4c..6649463da1b6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c | |||
@@ -36,8 +36,7 @@ | |||
36 | if RED works correctly. | 36 | if RED works correctly. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | struct red_sched_data | 39 | struct red_sched_data { |
40 | { | ||
41 | u32 limit; /* HARD maximal queue length */ | 40 | u32 limit; /* HARD maximal queue length */ |
42 | unsigned char flags; | 41 | unsigned char flags; |
43 | struct red_parms parms; | 42 | struct red_parms parms; |
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q) | |||
55 | return q->flags & TC_RED_HARDDROP; | 54 | return q->flags & TC_RED_HARDDROP; |
56 | } | 55 | } |
57 | 56 | ||
58 | static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 57 | static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
59 | { | 58 | { |
60 | struct red_sched_data *q = qdisc_priv(sch); | 59 | struct red_sched_data *q = qdisc_priv(sch); |
61 | struct Qdisc *child = q->qdisc; | 60 | struct Qdisc *child = q->qdisc; |
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
67 | red_end_of_idle_period(&q->parms); | 66 | red_end_of_idle_period(&q->parms); |
68 | 67 | ||
69 | switch (red_action(&q->parms, q->parms.qavg)) { | 68 | switch (red_action(&q->parms, q->parms.qavg)) { |
70 | case RED_DONT_MARK: | 69 | case RED_DONT_MARK: |
71 | break; | 70 | break; |
72 | 71 | ||
73 | case RED_PROB_MARK: | 72 | case RED_PROB_MARK: |
74 | sch->qstats.overlimits++; | 73 | sch->qstats.overlimits++; |
75 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { | 74 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { |
76 | q->stats.prob_drop++; | 75 | q->stats.prob_drop++; |
77 | goto congestion_drop; | 76 | goto congestion_drop; |
78 | } | 77 | } |
79 | 78 | ||
80 | q->stats.prob_mark++; | 79 | q->stats.prob_mark++; |
81 | break; | 80 | break; |
82 | 81 | ||
83 | case RED_HARD_MARK: | 82 | case RED_HARD_MARK: |
84 | sch->qstats.overlimits++; | 83 | sch->qstats.overlimits++; |
85 | if (red_use_harddrop(q) || !red_use_ecn(q) || | 84 | if (red_use_harddrop(q) || !red_use_ecn(q) || |
86 | !INET_ECN_set_ce(skb)) { | 85 | !INET_ECN_set_ce(skb)) { |
87 | q->stats.forced_drop++; | 86 | q->stats.forced_drop++; |
88 | goto congestion_drop; | 87 | goto congestion_drop; |
89 | } | 88 | } |
90 | 89 | ||
91 | q->stats.forced_mark++; | 90 | q->stats.forced_mark++; |
92 | break; | 91 | break; |
93 | } | 92 | } |
94 | 93 | ||
95 | ret = qdisc_enqueue(skb, child); | 94 | ret = qdisc_enqueue(skb, child); |
@@ -106,7 +105,7 @@ congestion_drop: | |||
106 | return NET_XMIT_CN; | 105 | return NET_XMIT_CN; |
107 | } | 106 | } |
108 | 107 | ||
109 | static struct sk_buff * red_dequeue(struct Qdisc* sch) | 108 | static struct sk_buff *red_dequeue(struct Qdisc *sch) |
110 | { | 109 | { |
111 | struct sk_buff *skb; | 110 | struct sk_buff *skb; |
112 | struct red_sched_data *q = qdisc_priv(sch); | 111 | struct red_sched_data *q = qdisc_priv(sch); |
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) | |||
123 | return skb; | 122 | return skb; |
124 | } | 123 | } |
125 | 124 | ||
126 | static struct sk_buff * red_peek(struct Qdisc* sch) | 125 | static struct sk_buff *red_peek(struct Qdisc *sch) |
127 | { | 126 | { |
128 | struct red_sched_data *q = qdisc_priv(sch); | 127 | struct red_sched_data *q = qdisc_priv(sch); |
129 | struct Qdisc *child = q->qdisc; | 128 | struct Qdisc *child = q->qdisc; |
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch) | |||
131 | return child->ops->peek(child); | 130 | return child->ops->peek(child); |
132 | } | 131 | } |
133 | 132 | ||
134 | static unsigned int red_drop(struct Qdisc* sch) | 133 | static unsigned int red_drop(struct Qdisc *sch) |
135 | { | 134 | { |
136 | struct red_sched_data *q = qdisc_priv(sch); | 135 | struct red_sched_data *q = qdisc_priv(sch); |
137 | struct Qdisc *child = q->qdisc; | 136 | struct Qdisc *child = q->qdisc; |
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch) | |||
150 | return 0; | 149 | return 0; |
151 | } | 150 | } |
152 | 151 | ||
153 | static void red_reset(struct Qdisc* sch) | 152 | static void red_reset(struct Qdisc *sch) |
154 | { | 153 | { |
155 | struct red_sched_data *q = qdisc_priv(sch); | 154 | struct red_sched_data *q = qdisc_priv(sch); |
156 | 155 | ||
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) | |||
217 | return 0; | 216 | return 0; |
218 | } | 217 | } |
219 | 218 | ||
220 | static int red_init(struct Qdisc* sch, struct nlattr *opt) | 219 | static int red_init(struct Qdisc *sch, struct nlattr *opt) |
221 | { | 220 | { |
222 | struct red_sched_data *q = qdisc_priv(sch); | 221 | struct red_sched_data *q = qdisc_priv(sch); |
223 | 222 | ||
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index edea8cefec6c..4cff44235773 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/jhash.h> | 22 | #include <linux/jhash.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | ||
24 | #include <net/ip.h> | 25 | #include <net/ip.h> |
25 | #include <net/netlink.h> | 26 | #include <net/netlink.h> |
26 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
@@ -76,7 +77,8 @@ | |||
76 | #define SFQ_DEPTH 128 /* max number of packets per flow */ | 77 | #define SFQ_DEPTH 128 /* max number of packets per flow */ |
77 | #define SFQ_SLOTS 128 /* max number of flows */ | 78 | #define SFQ_SLOTS 128 /* max number of flows */ |
78 | #define SFQ_EMPTY_SLOT 255 | 79 | #define SFQ_EMPTY_SLOT 255 |
79 | #define SFQ_HASH_DIVISOR 1024 | 80 | #define SFQ_DEFAULT_HASH_DIVISOR 1024 |
81 | |||
80 | /* We use 16 bits to store allot, and want to handle packets up to 64K | 82 | /* We use 16 bits to store allot, and want to handle packets up to 64K |
81 | * Scale allot by 8 (1<<3) so that no overflow occurs. | 83 | * Scale allot by 8 (1<<3) so that no overflow occurs. |
82 | */ | 84 | */ |
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index; | |||
92 | * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] | 94 | * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] |
93 | * are 'pointers' to dep[] array | 95 | * are 'pointers' to dep[] array |
94 | */ | 96 | */ |
95 | struct sfq_head | 97 | struct sfq_head { |
96 | { | ||
97 | sfq_index next; | 98 | sfq_index next; |
98 | sfq_index prev; | 99 | sfq_index prev; |
99 | }; | 100 | }; |
@@ -108,13 +109,12 @@ struct sfq_slot { | |||
108 | short allot; /* credit for this slot */ | 109 | short allot; /* credit for this slot */ |
109 | }; | 110 | }; |
110 | 111 | ||
111 | struct sfq_sched_data | 112 | struct sfq_sched_data { |
112 | { | ||
113 | /* Parameters */ | 113 | /* Parameters */ |
114 | int perturb_period; | 114 | int perturb_period; |
115 | unsigned quantum; /* Allotment per round: MUST BE >= MTU */ | 115 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ |
116 | int limit; | 116 | int limit; |
117 | 117 | unsigned int divisor; /* number of slots in hash table */ | |
118 | /* Variables */ | 118 | /* Variables */ |
119 | struct tcf_proto *filter_list; | 119 | struct tcf_proto *filter_list; |
120 | struct timer_list perturb_timer; | 120 | struct timer_list perturb_timer; |
@@ -122,7 +122,7 @@ struct sfq_sched_data | |||
122 | sfq_index cur_depth; /* depth of longest slot */ | 122 | sfq_index cur_depth; /* depth of longest slot */ |
123 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ | 123 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ |
124 | struct sfq_slot *tail; /* current slot in round */ | 124 | struct sfq_slot *tail; /* current slot in round */ |
125 | sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ | 125 | sfq_index *ht; /* Hash table (divisor slots) */ |
126 | struct sfq_slot slots[SFQ_SLOTS]; | 126 | struct sfq_slot slots[SFQ_SLOTS]; |
127 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ | 127 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ |
128 | }; | 128 | }; |
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index | |||
137 | return &q->dep[val - SFQ_SLOTS]; | 137 | return &q->dep[val - SFQ_SLOTS]; |
138 | } | 138 | } |
139 | 139 | ||
140 | static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) | 140 | static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) |
141 | { | 141 | { |
142 | return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); | 142 | return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); |
143 | } | 143 | } |
144 | 144 | ||
145 | static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | 145 | static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) |
146 | { | 146 | { |
147 | u32 h, h2; | 147 | u32 h, h2; |
148 | 148 | ||
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | |||
157 | iph = ip_hdr(skb); | 157 | iph = ip_hdr(skb); |
158 | h = (__force u32)iph->daddr; | 158 | h = (__force u32)iph->daddr; |
159 | h2 = (__force u32)iph->saddr ^ iph->protocol; | 159 | h2 = (__force u32)iph->saddr ^ iph->protocol; |
160 | if (iph->frag_off & htons(IP_MF|IP_OFFSET)) | 160 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
161 | break; | 161 | break; |
162 | poff = proto_ports_offset(iph->protocol); | 162 | poff = proto_ports_offset(iph->protocol); |
163 | if (poff >= 0 && | 163 | if (poff >= 0 && |
164 | pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { | 164 | pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { |
165 | iph = ip_hdr(skb); | 165 | iph = ip_hdr(skb); |
166 | h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff); | 166 | h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff); |
167 | } | 167 | } |
168 | break; | 168 | break; |
169 | } | 169 | } |
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | |||
181 | if (poff >= 0 && | 181 | if (poff >= 0 && |
182 | pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { | 182 | pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { |
183 | iph = ipv6_hdr(skb); | 183 | iph = ipv6_hdr(skb); |
184 | h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff); | 184 | h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff); |
185 | } | 185 | } |
186 | break; | 186 | break; |
187 | } | 187 | } |
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
203 | 203 | ||
204 | if (TC_H_MAJ(skb->priority) == sch->handle && | 204 | if (TC_H_MAJ(skb->priority) == sch->handle && |
205 | TC_H_MIN(skb->priority) > 0 && | 205 | TC_H_MIN(skb->priority) > 0 && |
206 | TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) | 206 | TC_H_MIN(skb->priority) <= q->divisor) |
207 | return TC_H_MIN(skb->priority); | 207 | return TC_H_MIN(skb->priority); |
208 | 208 | ||
209 | if (!q->filter_list) | 209 | if (!q->filter_list) |
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | #endif | 223 | #endif |
224 | if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) | 224 | if (TC_H_MIN(res.classid) <= q->divisor) |
225 | return TC_H_MIN(res.classid); | 225 | return TC_H_MIN(res.classid); |
226 | } | 226 | } |
227 | return 0; | 227 | return 0; |
@@ -497,7 +497,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
497 | q->perturb_period = ctl->perturb_period * HZ; | 497 | q->perturb_period = ctl->perturb_period * HZ; |
498 | if (ctl->limit) | 498 | if (ctl->limit) |
499 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); | 499 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); |
500 | 500 | if (ctl->divisor) { | |
501 | if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536) | ||
502 | return -EINVAL; | ||
503 | q->divisor = ctl->divisor; | ||
504 | } | ||
501 | qlen = sch->q.qlen; | 505 | qlen = sch->q.qlen; |
502 | while (sch->q.qlen > q->limit) | 506 | while (sch->q.qlen > q->limit) |
503 | sfq_drop(sch); | 507 | sfq_drop(sch); |
@@ -515,15 +519,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
515 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | 519 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) |
516 | { | 520 | { |
517 | struct sfq_sched_data *q = qdisc_priv(sch); | 521 | struct sfq_sched_data *q = qdisc_priv(sch); |
522 | size_t sz; | ||
518 | int i; | 523 | int i; |
519 | 524 | ||
520 | q->perturb_timer.function = sfq_perturbation; | 525 | q->perturb_timer.function = sfq_perturbation; |
521 | q->perturb_timer.data = (unsigned long)sch; | 526 | q->perturb_timer.data = (unsigned long)sch; |
522 | init_timer_deferrable(&q->perturb_timer); | 527 | init_timer_deferrable(&q->perturb_timer); |
523 | 528 | ||
524 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) | ||
525 | q->ht[i] = SFQ_EMPTY_SLOT; | ||
526 | |||
527 | for (i = 0; i < SFQ_DEPTH; i++) { | 529 | for (i = 0; i < SFQ_DEPTH; i++) { |
528 | q->dep[i].next = i + SFQ_SLOTS; | 530 | q->dep[i].next = i + SFQ_SLOTS; |
529 | q->dep[i].prev = i + SFQ_SLOTS; | 531 | q->dep[i].prev = i + SFQ_SLOTS; |
@@ -532,6 +534,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
532 | q->limit = SFQ_DEPTH - 1; | 534 | q->limit = SFQ_DEPTH - 1; |
533 | q->cur_depth = 0; | 535 | q->cur_depth = 0; |
534 | q->tail = NULL; | 536 | q->tail = NULL; |
537 | q->divisor = SFQ_DEFAULT_HASH_DIVISOR; | ||
535 | if (opt == NULL) { | 538 | if (opt == NULL) { |
536 | q->quantum = psched_mtu(qdisc_dev(sch)); | 539 | q->quantum = psched_mtu(qdisc_dev(sch)); |
537 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | 540 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); |
@@ -543,10 +546,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
543 | return err; | 546 | return err; |
544 | } | 547 | } |
545 | 548 | ||
549 | sz = sizeof(q->ht[0]) * q->divisor; | ||
550 | q->ht = kmalloc(sz, GFP_KERNEL); | ||
551 | if (!q->ht && sz > PAGE_SIZE) | ||
552 | q->ht = vmalloc(sz); | ||
553 | if (!q->ht) | ||
554 | return -ENOMEM; | ||
555 | for (i = 0; i < q->divisor; i++) | ||
556 | q->ht[i] = SFQ_EMPTY_SLOT; | ||
557 | |||
546 | for (i = 0; i < SFQ_SLOTS; i++) { | 558 | for (i = 0; i < SFQ_SLOTS; i++) { |
547 | slot_queue_init(&q->slots[i]); | 559 | slot_queue_init(&q->slots[i]); |
548 | sfq_link(q, i); | 560 | sfq_link(q, i); |
549 | } | 561 | } |
562 | if (q->limit >= 1) | ||
563 | sch->flags |= TCQ_F_CAN_BYPASS; | ||
564 | else | ||
565 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
550 | return 0; | 566 | return 0; |
551 | } | 567 | } |
552 | 568 | ||
@@ -557,6 +573,10 @@ static void sfq_destroy(struct Qdisc *sch) | |||
557 | tcf_destroy_chain(&q->filter_list); | 573 | tcf_destroy_chain(&q->filter_list); |
558 | q->perturb_period = 0; | 574 | q->perturb_period = 0; |
559 | del_timer_sync(&q->perturb_timer); | 575 | del_timer_sync(&q->perturb_timer); |
576 | if (is_vmalloc_addr(q->ht)) | ||
577 | vfree(q->ht); | ||
578 | else | ||
579 | kfree(q->ht); | ||
560 | } | 580 | } |
561 | 581 | ||
562 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | 582 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) |
@@ -569,7 +589,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
569 | opt.perturb_period = q->perturb_period / HZ; | 589 | opt.perturb_period = q->perturb_period / HZ; |
570 | 590 | ||
571 | opt.limit = q->limit; | 591 | opt.limit = q->limit; |
572 | opt.divisor = SFQ_HASH_DIVISOR; | 592 | opt.divisor = q->divisor; |
573 | opt.flows = q->limit; | 593 | opt.flows = q->limit; |
574 | 594 | ||
575 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 595 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
@@ -594,6 +614,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid) | |||
594 | static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, | 614 | static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, |
595 | u32 classid) | 615 | u32 classid) |
596 | { | 616 | { |
617 | /* we cannot bypass queue discipline anymore */ | ||
618 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
597 | return 0; | 619 | return 0; |
598 | } | 620 | } |
599 | 621 | ||
@@ -647,7 +669,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
647 | if (arg->stop) | 669 | if (arg->stop) |
648 | return; | 670 | return; |
649 | 671 | ||
650 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) { | 672 | for (i = 0; i < q->divisor; i++) { |
651 | if (q->ht[i] == SFQ_EMPTY_SLOT || | 673 | if (q->ht[i] == SFQ_EMPTY_SLOT || |
652 | arg->count < arg->skip) { | 674 | arg->count < arg->skip) { |
653 | arg->count++; | 675 | arg->count++; |
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index e93165820c3f..1dcfb5223a86 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c | |||
@@ -97,8 +97,7 @@ | |||
97 | changed the limit is not effective anymore. | 97 | changed the limit is not effective anymore. |
98 | */ | 98 | */ |
99 | 99 | ||
100 | struct tbf_sched_data | 100 | struct tbf_sched_data { |
101 | { | ||
102 | /* Parameters */ | 101 | /* Parameters */ |
103 | u32 limit; /* Maximal length of backlog: bytes */ | 102 | u32 limit; /* Maximal length of backlog: bytes */ |
104 | u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ | 103 | u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ |
@@ -115,10 +114,10 @@ struct tbf_sched_data | |||
115 | struct qdisc_watchdog watchdog; /* Watchdog timer */ | 114 | struct qdisc_watchdog watchdog; /* Watchdog timer */ |
116 | }; | 115 | }; |
117 | 116 | ||
118 | #define L2T(q,L) qdisc_l2t((q)->R_tab,L) | 117 | #define L2T(q, L) qdisc_l2t((q)->R_tab, L) |
119 | #define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) | 118 | #define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) |
120 | 119 | ||
121 | static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 120 | static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
122 | { | 121 | { |
123 | struct tbf_sched_data *q = qdisc_priv(sch); | 122 | struct tbf_sched_data *q = qdisc_priv(sch); |
124 | int ret; | 123 | int ret; |
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
137 | return NET_XMIT_SUCCESS; | 136 | return NET_XMIT_SUCCESS; |
138 | } | 137 | } |
139 | 138 | ||
140 | static unsigned int tbf_drop(struct Qdisc* sch) | 139 | static unsigned int tbf_drop(struct Qdisc *sch) |
141 | { | 140 | { |
142 | struct tbf_sched_data *q = qdisc_priv(sch); | 141 | struct tbf_sched_data *q = qdisc_priv(sch); |
143 | unsigned int len = 0; | 142 | unsigned int len = 0; |
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch) | |||
149 | return len; | 148 | return len; |
150 | } | 149 | } |
151 | 150 | ||
152 | static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | 151 | static struct sk_buff *tbf_dequeue(struct Qdisc *sch) |
153 | { | 152 | { |
154 | struct tbf_sched_data *q = qdisc_priv(sch); | 153 | struct tbf_sched_data *q = qdisc_priv(sch); |
155 | struct sk_buff *skb; | 154 | struct sk_buff *skb; |
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | |||
185 | q->tokens = toks; | 184 | q->tokens = toks; |
186 | q->ptokens = ptoks; | 185 | q->ptokens = ptoks; |
187 | sch->q.qlen--; | 186 | sch->q.qlen--; |
188 | sch->flags &= ~TCQ_F_THROTTLED; | 187 | qdisc_unthrottled(sch); |
189 | qdisc_bstats_update(sch, skb); | 188 | qdisc_bstats_update(sch, skb); |
190 | return skb; | 189 | return skb; |
191 | } | 190 | } |
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | |||
209 | return NULL; | 208 | return NULL; |
210 | } | 209 | } |
211 | 210 | ||
212 | static void tbf_reset(struct Qdisc* sch) | 211 | static void tbf_reset(struct Qdisc *sch) |
213 | { | 212 | { |
214 | struct tbf_sched_data *q = qdisc_priv(sch); | 213 | struct tbf_sched_data *q = qdisc_priv(sch); |
215 | 214 | ||
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { | |||
227 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, | 226 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, |
228 | }; | 227 | }; |
229 | 228 | ||
230 | static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | 229 | static int tbf_change(struct Qdisc *sch, struct nlattr *opt) |
231 | { | 230 | { |
232 | int err; | 231 | int err; |
233 | struct tbf_sched_data *q = qdisc_priv(sch); | 232 | struct tbf_sched_data *q = qdisc_priv(sch); |
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
236 | struct qdisc_rate_table *rtab = NULL; | 235 | struct qdisc_rate_table *rtab = NULL; |
237 | struct qdisc_rate_table *ptab = NULL; | 236 | struct qdisc_rate_table *ptab = NULL; |
238 | struct Qdisc *child = NULL; | 237 | struct Qdisc *child = NULL; |
239 | int max_size,n; | 238 | int max_size, n; |
240 | 239 | ||
241 | err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); | 240 | err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); |
242 | if (err < 0) | 241 | if (err < 0) |
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
259 | } | 258 | } |
260 | 259 | ||
261 | for (n = 0; n < 256; n++) | 260 | for (n = 0; n < 256; n++) |
262 | if (rtab->data[n] > qopt->buffer) break; | 261 | if (rtab->data[n] > qopt->buffer) |
263 | max_size = (n << qopt->rate.cell_log)-1; | 262 | break; |
263 | max_size = (n << qopt->rate.cell_log) - 1; | ||
264 | if (ptab) { | 264 | if (ptab) { |
265 | int size; | 265 | int size; |
266 | 266 | ||
267 | for (n = 0; n < 256; n++) | 267 | for (n = 0; n < 256; n++) |
268 | if (ptab->data[n] > qopt->mtu) break; | 268 | if (ptab->data[n] > qopt->mtu) |
269 | size = (n << qopt->peakrate.cell_log)-1; | 269 | break; |
270 | if (size < max_size) max_size = size; | 270 | size = (n << qopt->peakrate.cell_log) - 1; |
271 | if (size < max_size) | ||
272 | max_size = size; | ||
271 | } | 273 | } |
272 | if (max_size < 0) | 274 | if (max_size < 0) |
273 | goto done; | 275 | goto done; |
@@ -310,7 +312,7 @@ done: | |||
310 | return err; | 312 | return err; |
311 | } | 313 | } |
312 | 314 | ||
313 | static int tbf_init(struct Qdisc* sch, struct nlattr *opt) | 315 | static int tbf_init(struct Qdisc *sch, struct nlattr *opt) |
314 | { | 316 | { |
315 | struct tbf_sched_data *q = qdisc_priv(sch); | 317 | struct tbf_sched_data *q = qdisc_priv(sch); |
316 | 318 | ||
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) | |||
422 | } | 424 | } |
423 | } | 425 | } |
424 | 426 | ||
425 | static const struct Qdisc_class_ops tbf_class_ops = | 427 | static const struct Qdisc_class_ops tbf_class_ops = { |
426 | { | ||
427 | .graft = tbf_graft, | 428 | .graft = tbf_graft, |
428 | .leaf = tbf_leaf, | 429 | .leaf = tbf_leaf, |
429 | .get = tbf_get, | 430 | .get = tbf_get, |
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index d84e7329660f..45cd30098e34 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c | |||
@@ -53,8 +53,7 @@ | |||
53 | which will not break load balancing, though native slave | 53 | which will not break load balancing, though native slave |
54 | traffic will have the highest priority. */ | 54 | traffic will have the highest priority. */ |
55 | 55 | ||
56 | struct teql_master | 56 | struct teql_master { |
57 | { | ||
58 | struct Qdisc_ops qops; | 57 | struct Qdisc_ops qops; |
59 | struct net_device *dev; | 58 | struct net_device *dev; |
60 | struct Qdisc *slaves; | 59 | struct Qdisc *slaves; |
@@ -65,22 +64,21 @@ struct teql_master | |||
65 | unsigned long tx_dropped; | 64 | unsigned long tx_dropped; |
66 | }; | 65 | }; |
67 | 66 | ||
68 | struct teql_sched_data | 67 | struct teql_sched_data { |
69 | { | ||
70 | struct Qdisc *next; | 68 | struct Qdisc *next; |
71 | struct teql_master *m; | 69 | struct teql_master *m; |
72 | struct neighbour *ncache; | 70 | struct neighbour *ncache; |
73 | struct sk_buff_head q; | 71 | struct sk_buff_head q; |
74 | }; | 72 | }; |
75 | 73 | ||
76 | #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) | 74 | #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) |
77 | 75 | ||
78 | #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) | 76 | #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) |
79 | 77 | ||
80 | /* "teql*" qdisc routines */ | 78 | /* "teql*" qdisc routines */ |
81 | 79 | ||
82 | static int | 80 | static int |
83 | teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 81 | teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
84 | { | 82 | { |
85 | struct net_device *dev = qdisc_dev(sch); | 83 | struct net_device *dev = qdisc_dev(sch); |
86 | struct teql_sched_data *q = qdisc_priv(sch); | 84 | struct teql_sched_data *q = qdisc_priv(sch); |
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
96 | } | 94 | } |
97 | 95 | ||
98 | static struct sk_buff * | 96 | static struct sk_buff * |
99 | teql_dequeue(struct Qdisc* sch) | 97 | teql_dequeue(struct Qdisc *sch) |
100 | { | 98 | { |
101 | struct teql_sched_data *dat = qdisc_priv(sch); | 99 | struct teql_sched_data *dat = qdisc_priv(sch); |
102 | struct netdev_queue *dat_queue; | 100 | struct netdev_queue *dat_queue; |
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch) | |||
118 | } | 116 | } |
119 | 117 | ||
120 | static struct sk_buff * | 118 | static struct sk_buff * |
121 | teql_peek(struct Qdisc* sch) | 119 | teql_peek(struct Qdisc *sch) |
122 | { | 120 | { |
123 | /* teql is meant to be used as root qdisc */ | 121 | /* teql is meant to be used as root qdisc */ |
124 | return NULL; | 122 | return NULL; |
125 | } | 123 | } |
126 | 124 | ||
127 | static __inline__ void | 125 | static inline void |
128 | teql_neigh_release(struct neighbour *n) | 126 | teql_neigh_release(struct neighbour *n) |
129 | { | 127 | { |
130 | if (n) | 128 | if (n) |
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n) | |||
132 | } | 130 | } |
133 | 131 | ||
134 | static void | 132 | static void |
135 | teql_reset(struct Qdisc* sch) | 133 | teql_reset(struct Qdisc *sch) |
136 | { | 134 | { |
137 | struct teql_sched_data *dat = qdisc_priv(sch); | 135 | struct teql_sched_data *dat = qdisc_priv(sch); |
138 | 136 | ||
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch) | |||
142 | } | 140 | } |
143 | 141 | ||
144 | static void | 142 | static void |
145 | teql_destroy(struct Qdisc* sch) | 143 | teql_destroy(struct Qdisc *sch) |
146 | { | 144 | { |
147 | struct Qdisc *q, *prev; | 145 | struct Qdisc *q, *prev; |
148 | struct teql_sched_data *dat = qdisc_priv(sch); | 146 | struct teql_sched_data *dat = qdisc_priv(sch); |
149 | struct teql_master *master = dat->m; | 147 | struct teql_master *master = dat->m; |
150 | 148 | ||
151 | if ((prev = master->slaves) != NULL) { | 149 | prev = master->slaves; |
150 | if (prev) { | ||
152 | do { | 151 | do { |
153 | q = NEXT_SLAVE(prev); | 152 | q = NEXT_SLAVE(prev); |
154 | if (q == sch) { | 153 | if (q == sch) { |
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch) | |||
180 | static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) | 179 | static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) |
181 | { | 180 | { |
182 | struct net_device *dev = qdisc_dev(sch); | 181 | struct net_device *dev = qdisc_dev(sch); |
183 | struct teql_master *m = (struct teql_master*)sch->ops; | 182 | struct teql_master *m = (struct teql_master *)sch->ops; |
184 | struct teql_sched_data *q = qdisc_priv(sch); | 183 | struct teql_sched_data *q = qdisc_priv(sch); |
185 | 184 | ||
186 | if (dev->hard_header_len > m->dev->hard_header_len) | 185 | if (dev->hard_header_len > m->dev->hard_header_len) |
@@ -291,7 +290,8 @@ restart: | |||
291 | nores = 0; | 290 | nores = 0; |
292 | busy = 0; | 291 | busy = 0; |
293 | 292 | ||
294 | if ((q = start) == NULL) | 293 | q = start; |
294 | if (!q) | ||
295 | goto drop; | 295 | goto drop; |
296 | 296 | ||
297 | do { | 297 | do { |
@@ -356,10 +356,10 @@ drop: | |||
356 | 356 | ||
357 | static int teql_master_open(struct net_device *dev) | 357 | static int teql_master_open(struct net_device *dev) |
358 | { | 358 | { |
359 | struct Qdisc * q; | 359 | struct Qdisc *q; |
360 | struct teql_master *m = netdev_priv(dev); | 360 | struct teql_master *m = netdev_priv(dev); |
361 | int mtu = 0xFFFE; | 361 | int mtu = 0xFFFE; |
362 | unsigned flags = IFF_NOARP|IFF_MULTICAST; | 362 | unsigned int flags = IFF_NOARP | IFF_MULTICAST; |
363 | 363 | ||
364 | if (m->slaves == NULL) | 364 | if (m->slaves == NULL) |
365 | return -EUNATCH; | 365 | return -EUNATCH; |
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) | |||
427 | do { | 427 | do { |
428 | if (new_mtu > qdisc_dev(q)->mtu) | 428 | if (new_mtu > qdisc_dev(q)->mtu) |
429 | return -EINVAL; | 429 | return -EINVAL; |
430 | } while ((q=NEXT_SLAVE(q)) != m->slaves); | 430 | } while ((q = NEXT_SLAVE(q)) != m->slaves); |
431 | } | 431 | } |
432 | 432 | ||
433 | dev->mtu = new_mtu; | 433 | dev->mtu = new_mtu; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d286204..d8d98d5b508c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -1475,6 +1475,12 @@ restart: | |||
1475 | goto out_free; | 1475 | goto out_free; |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | if (sk_filter(other, skb) < 0) { | ||
1479 | /* Toss the packet but do not return any error to the sender */ | ||
1480 | err = len; | ||
1481 | goto out_free; | ||
1482 | } | ||
1483 | |||
1478 | unix_state_lock(other); | 1484 | unix_state_lock(other); |
1479 | err = -EPERM; | 1485 | err = -EPERM; |
1480 | if (!unix_may_send(sk, other)) | 1486 | if (!unix_may_send(sk, other)) |
@@ -1978,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode) | |||
1978 | 1984 | ||
1979 | mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); | 1985 | mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); |
1980 | 1986 | ||
1981 | if (mode) { | 1987 | if (!mode) |
1982 | unix_state_lock(sk); | 1988 | return 0; |
1983 | sk->sk_shutdown |= mode; | 1989 | |
1984 | other = unix_peer(sk); | 1990 | unix_state_lock(sk); |
1985 | if (other) | 1991 | sk->sk_shutdown |= mode; |
1986 | sock_hold(other); | 1992 | other = unix_peer(sk); |
1987 | unix_state_unlock(sk); | 1993 | if (other) |
1988 | sk->sk_state_change(sk); | 1994 | sock_hold(other); |
1989 | 1995 | unix_state_unlock(sk); | |
1990 | if (other && | 1996 | sk->sk_state_change(sk); |
1991 | (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { | 1997 | |
1992 | 1998 | if (other && | |
1993 | int peer_mode = 0; | 1999 | (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { |
1994 | 2000 | ||
1995 | if (mode&RCV_SHUTDOWN) | 2001 | int peer_mode = 0; |
1996 | peer_mode |= SEND_SHUTDOWN; | 2002 | |
1997 | if (mode&SEND_SHUTDOWN) | 2003 | if (mode&RCV_SHUTDOWN) |
1998 | peer_mode |= RCV_SHUTDOWN; | 2004 | peer_mode |= SEND_SHUTDOWN; |
1999 | unix_state_lock(other); | 2005 | if (mode&SEND_SHUTDOWN) |
2000 | other->sk_shutdown |= peer_mode; | 2006 | peer_mode |= RCV_SHUTDOWN; |
2001 | unix_state_unlock(other); | 2007 | unix_state_lock(other); |
2002 | other->sk_state_change(other); | 2008 | other->sk_shutdown |= peer_mode; |
2003 | if (peer_mode == SHUTDOWN_MASK) | 2009 | unix_state_unlock(other); |
2004 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); | 2010 | other->sk_state_change(other); |
2005 | else if (peer_mode & RCV_SHUTDOWN) | 2011 | if (peer_mode == SHUTDOWN_MASK) |
2006 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); | 2012 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); |
2007 | } | 2013 | else if (peer_mode & RCV_SHUTDOWN) |
2008 | if (other) | 2014 | sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); |
2009 | sock_put(other); | ||
2010 | } | 2015 | } |
2016 | if (other) | ||
2017 | sock_put(other); | ||
2018 | |||
2011 | return 0; | 2019 | return 0; |
2012 | } | 2020 | } |
2013 | 2021 | ||
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 74944a2dd436..788a12c1eb5d 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c | |||
@@ -59,8 +59,6 @@ | |||
59 | #include <asm/uaccess.h> /* copy_to/from_user */ | 59 | #include <asm/uaccess.h> /* copy_to/from_user */ |
60 | #include <linux/init.h> /* __initfunc et al. */ | 60 | #include <linux/init.h> /* __initfunc et al. */ |
61 | 61 | ||
62 | #define KMEM_SAFETYZONE 8 | ||
63 | |||
64 | #define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) | 62 | #define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) |
65 | 63 | ||
66 | /* | 64 | /* |